From 2b1f9c7143e15e2b934249f7fadadf156e31d40e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 22 Feb 2018 21:45:42 -0700 Subject: [PATCH] PATCH: [perl #132900] Blead Breaks CPAN: FELIPE/Crypt-Perl The root cause of this was using a 'char' where it should have been 'U8'. I changed the signatures so that all the related functions take and return U8's, and the compiler detects what should be cast to/from char. The functions all deal with byte bit patterns, so unsigned is the appropriate declaration. --- embed.fnc | 10 +++++----- proto.h | 4 ++-- regexec.c | 28 ++++++++++++++-------------- t/re/re_tests | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/embed.fnc b/embed.fnc index 3c66fa4..45c37b6 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2540,12 +2540,12 @@ ERp |bool |_is_grapheme |NN const U8 * strbeg|NN const U8 * s|NN const U8 *stren #if defined(PERL_IN_REGEXEC_C) ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character -ERns |char *|find_next_ascii|NN char* s|NN const char * send|const bool is_utf8 -ERns |char *|find_next_non_ascii|NN char* s|NN const char * send|const bool is_utf8 -ERns |char * |find_next_masked|NN char * s \ - |NN const char * send \ +ERns |char * |find_next_ascii|NN char* s|NN const char * send|const bool is_utf8 +ERns |char * |find_next_non_ascii|NN char* s|NN const char * send|const bool is_utf8 +ERns |U8 * |find_next_masked|NN U8 * s \ + |NN const U8 * send \ |const U8 byte|const U8 mask -ERns |char *|find_span_end |NN char* s|NN const char * send|const char span_byte +ERns |U8 *|find_span_end |NN U8* s|NN const U8 * send|const U8 span_byte ERns |U8 *|find_span_end_mask|NN U8 * s|NN const U8 * send \ |const U8 span_byte|const U8 mask ERs |SSize_t|regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog diff --git a/proto.h b/proto.h index d6c36a0..c858fd3 100644 --- a/proto.h +++ b/proto.h @@ -5595,7 +5595,7 @@ STATIC char * S_find_next_ascii(char* s, const char * send, const bool is_utf8) #define PERL_ARGS_ASSERT_FIND_NEXT_ASCII \ assert(s); assert(send) -STATIC char * S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask) +STATIC U8 * S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_FIND_NEXT_MASKED \ assert(s); assert(send) @@ -5605,7 +5605,7 @@ STATIC char * S_find_next_non_ascii(char* s, const char * send, const bool is_ut #define PERL_ARGS_ASSERT_FIND_NEXT_NON_ASCII \ assert(s); assert(send) -STATIC char * S_find_span_end(char* s, const char * send, const char span_byte) +STATIC U8 * S_find_span_end(U8* s, const U8 * send, const U8 span_byte) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_FIND_SPAN_END \ assert(s); assert(send) diff --git a/regexec.c b/regexec.c index 1cda2e8..750ddb5 100644 --- a/regexec.c +++ b/regexec.c @@ -676,8 +676,8 @@ S_find_next_non_ascii(char * s, const char * send, const bool utf8_target) } -STATIC char * -S_find_span_end(char * s, const char * send, const char span_byte) +STATIC U8 * +S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) { /* Returns the position of the first byte in the sequence between 's' and * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found. @@ -741,8 +741,8 @@ S_find_span_end(char * s, const char * send, const char span_byte) return s; } -STATIC char * -S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask) +STATIC U8 * +S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) { /* Returns the position of the first byte in the sequence between 's' * and 'send-1' inclusive that when ANDed with 'mask' yields 'byte'; @@ -761,7 +761,7 @@ S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask) PERL_UINTMAX_T word_complemented, mask_word; while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) { - if (((* (U8 *) s) & mask) == byte) { + if (((*s) & mask) == byte) { return s; } s++; @@ -804,7 +804,7 @@ S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask) } while (s < send) { - if (((* (U8 *) s) & mask) == byte) { + if (((*s) & mask) == byte) { return s; } s++; @@ -834,7 +834,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask) PERL_UINTMAX_T span_word, mask_word; while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) { - if (((* (U8 *) s) & mask) != span_byte) { + if (((*s) & mask) != span_byte) { return s; } s++; @@ -861,7 +861,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask) } while (s < send) { - if (((* (U8 *) s) & mask) != span_byte) { + if (((*s) & mask) != span_byte) { return s; } s++; @@ -1951,7 +1951,7 @@ STMT_START { * there is no such occurrence. */ #define REXEC_FBC_FIND_NEXT_SCAN(UTF8, f) \ while (s < strend) { \ - s = f; \ + s = (f); \ if (s >= strend) { \ break; \ } \ @@ -2250,7 +2250,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, case ANYOFM: /* ARG() is the base byte; FLAGS() the mask byte */ /* UTF-8ness doesn't matter, so use 0 */ REXEC_FBC_FIND_NEXT_SCAN(0, - find_next_masked(s, strend, ARG(c), FLAGS(c))); + (char *) find_next_masked((U8 *) s, (U8 *) strend, ARG(c), FLAGS(c))); break; case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */ @@ -2364,7 +2364,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, if (LIKELY(PL_bitcount[bits_differing] == 1)) { bits_differing = ~ bits_differing; while (s <= e) { - s = find_next_masked(s, e + 1, + s = (char *) find_next_masked((U8 *) s, (U8 *) e + 1, (c1 & bits_differing), bits_differing); if (s > e) { break; @@ -9285,7 +9285,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, * since here, to match at all, 1 char == 1 byte */ loceol = scan + max; } - scan = find_span_end(scan, loceol, (U8) c); + scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c); } else if (reginfo->is_utf8_pat) { if (utf8_target) { @@ -9307,7 +9307,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, /* Target isn't utf8; convert the character in the UTF-8 * pattern to non-UTF8, and do a simple find */ c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1)); - scan = find_span_end(scan, loceol, (U8) c); + scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c); } /* else pattern char is above Latin1, can't possibly match the non-UTF-8 target */ } @@ -9405,7 +9405,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, } } else if (c1 == c2) { - scan = find_span_end(scan, loceol, c1); + scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c1); } else { /* See comments in regmatch() CURLY_B_min_known_fail. We avoid diff --git a/t/re/re_tests b/t/re/re_tests index 61b8c87..f38743d 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1989,7 +1989,7 @@ AB\s+\x{100} AB \x{100}X y - - ([[:ascii:]]+)\x81 a\x80b\x81 y $& b\x81 [[:^ascii:]]+b \x80a\x81b y $& \x81b [[:^ascii:]]+b \x80a\x81\x{100}b y $& \x81\x{100}b - +/\A\x80+\z/ \x80\x80\x80\x80\x80\x80\x80\x80\x80 y $& \x80\x80\x80\x80\x80\x80\x80\x80\x80 # [perl #132900] # Keep these lines at the end of the file # vim: softtabstop=0 noexpandtab -- 1.8.3.1