From 2b1f9c7143e15e2b934249f7fadadf156e31d40e Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Thu, 22 Feb 2018 21:45:42 -0700
Subject: [PATCH] PATCH: [perl #132900] Blead Breaks CPAN: FELIPE/Crypt-Perl

The root cause of this was using a 'char' where it should have been
'U8'.  I changed the signatures so that all the related functions take
and return U8's, and the compiler detects what should be cast to/from
char.  The functions all deal with byte bit patterns, so unsigned is the
appropriate declaration.
---
 embed.fnc     | 10 +++++-----
 proto.h       |  4 ++--
 regexec.c     | 28 ++++++++++++++--------------
 t/re/re_tests |  2 +-
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 3c66fa4..45c37b6 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2540,12 +2540,12 @@ ERp	|bool	|_is_grapheme	|NN const U8 * strbeg|NN const U8 * s|NN const U8 *stren
 
 #if defined(PERL_IN_REGEXEC_C)
 ERs	|bool	|isFOO_utf8_lc	|const U8 classnum|NN const U8* character
-ERns	|char *|find_next_ascii|NN char* s|NN const char * send|const bool is_utf8
-ERns	|char *|find_next_non_ascii|NN char* s|NN const char * send|const bool is_utf8
-ERns	|char *	|find_next_masked|NN char * s				\
-				 |NN const char * send			\
+ERns	|char *	|find_next_ascii|NN char* s|NN const char * send|const bool is_utf8
+ERns	|char *	|find_next_non_ascii|NN char* s|NN const char * send|const bool is_utf8
+ERns	|U8 *	|find_next_masked|NN U8 * s				\
+				 |NN const U8 * send			\
 				 |const U8 byte|const U8 mask
-ERns	|char *|find_span_end	|NN char* s|NN const char * send|const char span_byte
+ERns	|U8 *|find_span_end	|NN U8* s|NN const U8 * send|const U8 span_byte
 ERns	|U8 *|find_span_end_mask|NN U8 * s|NN const U8 * send	\
 				|const U8 span_byte|const U8 mask
 ERs	|SSize_t|regmatch	|NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
diff --git a/proto.h b/proto.h
index d6c36a0..c858fd3 100644
--- a/proto.h
+++ b/proto.h
@@ -5595,7 +5595,7 @@ STATIC char *	S_find_next_ascii(char* s, const char * send, const bool is_utf8)
 #define PERL_ARGS_ASSERT_FIND_NEXT_ASCII	\
 	assert(s); assert(send)
 
-STATIC char *	S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
+STATIC U8 *	S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_FIND_NEXT_MASKED	\
 	assert(s); assert(send)
@@ -5605,7 +5605,7 @@ STATIC char *	S_find_next_non_ascii(char* s, const char * send, const bool is_ut
 #define PERL_ARGS_ASSERT_FIND_NEXT_NON_ASCII	\
 	assert(s); assert(send)
 
-STATIC char *	S_find_span_end(char* s, const char * send, const char span_byte)
+STATIC U8 *	S_find_span_end(U8* s, const U8 * send, const U8 span_byte)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_FIND_SPAN_END	\
 	assert(s); assert(send)
diff --git a/regexec.c b/regexec.c
index 1cda2e8..750ddb5 100644
--- a/regexec.c
+++ b/regexec.c
@@ -676,8 +676,8 @@ S_find_next_non_ascii(char * s, const char * send, const bool utf8_target)
 
 }
 
-STATIC char *
-S_find_span_end(char * s, const char * send, const char span_byte)
+STATIC U8 *
+S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
 {
     /* Returns the position of the first byte in the sequence between 's' and
      * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
@@ -741,8 +741,8 @@ S_find_span_end(char * s, const char * send, const char span_byte)
     return s;
 }
 
-STATIC char *
-S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
+STATIC U8 *
+S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
 {
     /* Returns the position of the first byte in the sequence between 's'
      * and 'send-1' inclusive that when ANDed with 'mask' yields 'byte';
@@ -761,7 +761,7 @@ S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
         PERL_UINTMAX_T word_complemented, mask_word;
 
         while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
-            if (((* (U8 *) s) & mask) == byte) {
+            if (((*s) & mask) == byte) {
                 return s;
             }
             s++;
@@ -804,7 +804,7 @@ S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
     }
 
     while (s < send) {
-        if (((* (U8 *) s) & mask) == byte) {
+        if (((*s) & mask) == byte) {
             return s;
         }
         s++;
@@ -834,7 +834,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
         PERL_UINTMAX_T span_word, mask_word;
 
         while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
-            if (((* (U8 *) s) & mask) != span_byte) {
+            if (((*s) & mask) != span_byte) {
                 return s;
             }
             s++;
@@ -861,7 +861,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
     }
 
     while (s < send) {
-        if (((* (U8 *) s) & mask) != span_byte) {
+        if (((*s) & mask) != span_byte) {
             return s;
         }
         s++;
@@ -1951,7 +1951,7 @@ STMT_START {
  * there is no such occurrence. */
 #define REXEC_FBC_FIND_NEXT_SCAN(UTF8, f)                   \
     while (s < strend) {                                    \
-        s = f;                                              \
+        s = (f);                                            \
         if (s >= strend) {                                  \
             break;                                          \
         }                                                   \
@@ -2250,7 +2250,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
     case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
         /* UTF-8ness doesn't matter, so use 0 */
         REXEC_FBC_FIND_NEXT_SCAN(0,
-                                 find_next_masked(s, strend, ARG(c), FLAGS(c)));
+         (char *) find_next_masked((U8 *) s, (U8 *) strend, ARG(c), FLAGS(c)));
         break;
 
     case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
@@ -2364,7 +2364,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
             if (LIKELY(PL_bitcount[bits_differing] == 1)) {
                 bits_differing = ~ bits_differing;
                 while (s <= e) {
-                    s = find_next_masked(s, e + 1,
+                    s = (char *) find_next_masked((U8 *) s, (U8 *) e + 1,
                                         (c1 & bits_differing), bits_differing);
                     if (s > e) {
                         break;
@@ -9285,7 +9285,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  * since here, to match at all, 1 char == 1 byte */
                 loceol = scan + max;
             }
-            scan = find_span_end(scan, loceol, (U8) c);
+            scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
 	}
 	else if (reginfo->is_utf8_pat) {
             if (utf8_target) {
@@ -9307,7 +9307,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                 /* Target isn't utf8; convert the character in the UTF-8
                  * pattern to non-UTF8, and do a simple find */
                 c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
-                scan = find_span_end(scan, loceol, (U8) c);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
             } /* else pattern char is above Latin1, can't possibly match the
                  non-UTF-8 target */
         }
@@ -9405,7 +9405,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                 }
             }
             else if (c1 == c2) {
-                scan = find_span_end(scan, loceol, c1);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c1);
             }
             else {
                 /* See comments in regmatch() CURLY_B_min_known_fail.  We avoid
diff --git a/t/re/re_tests b/t/re/re_tests
index 61b8c87..f38743d 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1989,7 +1989,7 @@ AB\s+\x{100}	AB \x{100}X	y	-	-
 ([[:ascii:]]+)\x81	a\x80b\x81	y	$&	b\x81
 [[:^ascii:]]+b	\x80a\x81b	y	$&	\x81b
 [[:^ascii:]]+b	\x80a\x81\x{100}b	y	$&	\x81\x{100}b
-
+/\A\x80+\z/	\x80\x80\x80\x80\x80\x80\x80\x80\x80	y	$&	\x80\x80\x80\x80\x80\x80\x80\x80\x80		# [perl #132900]
 
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab
-- 
1.8.3.1