Convert some calls to test for malformations

author Karl Williamson <khw@cpan.org>

Fri, 16 Dec 2016 03:11:00 +0000 (20:11 -0700)

committer Karl Williamson <khw@cpan.org>

Sat, 24 Dec 2016 05:36:34 +0000 (22:36 -0700)
author Karl Williamson <khw@cpan.org>
Fri, 16 Dec 2016 03:11:00 +0000 (20:11 -0700)
committer Karl Williamson <khw@cpan.org>
Sat, 24 Dec 2016 05:36:34 +0000 (22:36 -0700)
diff --git a/locale.c b/locale.c

index 07f599c..b86077f 100644 (file)
--- a/locale.c
+++ b/locale.c
@@ -1723,13 +1723,14 @@ Perl__mem_collxfrm(pTHX_ const char *input_string,
                  {
                      STRLEN i;
                      STRLEN d= 0;
                  {
                      STRLEN i;
                      STRLEN d= 0;
+                    char * e = (char *) t + len;
  
                      for (i = 0; i < len; i+= UTF8SKIP(t + i)) {
                          U8 cur_char = t[i];
                          if (UTF8_IS_INVARIANT(cur_char)) {
                              s[d++] = cur_char;
                          }
  
                      for (i = 0; i < len; i+= UTF8SKIP(t + i)) {
                          U8 cur_char = t[i];
                          if (UTF8_IS_INVARIANT(cur_char)) {
                              s[d++] = cur_char;
                          }
-                        else if (UTF8_IS_DOWNGRADEABLE_START(cur_char)) {
+                        else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(t + i, e)) {
                              s[d++] = EIGHT_BIT_UTF8_TO_NATIVE(cur_char, t[i+1]);
                          }
                          else {  /* Replace illegal cp with highest collating
                              s[d++] = EIGHT_BIT_UTF8_TO_NATIVE(cur_char, t[i+1]);
                          }
                          else {  /* Replace illegal cp with highest collating
diff --git a/pp.c b/pp.c

index 26b1cb3..9dad252 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -4404,7 +4404,7 @@ PP(pp_quotemeta)
                         to_quote = TRUE;
                     }
                 }
                         to_quote = TRUE;
                     }
                 }
-               else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
+               else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) {
                     if (
  #ifdef USE_LOCALE_CTYPE
                     /* In locale, we quote all non-ASCII Latin1 chars.
                     if (
  #ifdef USE_LOCALE_CTYPE
                     /* In locale, we quote all non-ASCII Latin1 chars.
diff --git a/regexec.c b/regexec.c

index 8b5caa7..d9898cb 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2429,7 +2429,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if ((UTF8_IS_INVARIANT(*s)
                           && to_complement ^ cBOOL(_generic_isCC((U8) *s,
                                                                  classnum)))
                      if ((UTF8_IS_INVARIANT(*s)
                           && to_complement ^ cBOOL(_generic_isCC((U8) *s,
                                                                  classnum)))
-                        || (UTF8_IS_DOWNGRADEABLE_START(*s)
+                        || (   UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, strend)
                              && to_complement ^ cBOOL(
                                  _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*s,
                                                                        *(s + 1)),
                              && to_complement ^ cBOOL(
                                  _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*s,
                                                                        *(s + 1)),
@@ -6373,8 +6373,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  break;
              }
  
                  break;
              }
  
-            if (! UTF8_IS_DOWNGRADEABLE_START(nextchr)) { /* An above Latin-1 code point */
-                _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
+            if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
+                /* An above Latin-1 code point, or malformed */
+                _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput,
+                                                       reginfo->strend);
                  goto utf8_posix_above_latin1;
              }
  
                  goto utf8_posix_above_latin1;
              }
  
@@ -6458,7 +6460,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  }
                  locinput++;
              }
                  }
                  locinput++;
              }
-            else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
+            else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
                  if (! (to_complement
                         ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
                                                                 *(locinput + 1)),
                  if (! (to_complement
                         ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
                                                                 *(locinput + 1)),
diff --git a/utf8.c b/utf8.c

index 5b98352..7f3ea11 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -3037,7 +3037,7 @@ S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* c
              return L1_func(*p, ustrp, lenp, L1_func_extra_param);            \
          }                                                                    \
      }                                                                        \
              return L1_func(*p, ustrp, lenp, L1_func_extra_param);            \
          }                                                                    \
      }                                                                        \
-    else if UTF8_IS_DOWNGRADEABLE_START(*p) {                                \
+    else if UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, p + UTF8SKIP(p)) {            \
          if (flags & (locale_flags)) {                                        \
              result = LC_L1_change_macro(EIGHT_BIT_UTF8_TO_NATIVE(*p,         \
                                                                   *(p+1)));   \
          if (flags & (locale_flags)) {                                        \
              result = LC_L1_change_macro(EIGHT_BIT_UTF8_TO_NATIVE(*p,         \
                                                                   *(p+1)));   \
author	Karl Williamson <khw@cpan.org>
	Fri, 16 Dec 2016 03:11:00 +0000 (20:11 -0700)
committer	Karl Williamson <khw@cpan.org>
	Sat, 24 Dec 2016 05:36:34 +0000 (22:36 -0700)
locale.c		patch \| blob \| blame \| history
pp.c		patch \| blob \| blame \| history
regexec.c		patch \| blob \| blame \| history
utf8.c		patch \| blob \| blame \| history