AUTHORS formatting clean-up

[perl5.git] / pp.c
diff --git a/pp.c b/pp.c

index 6925f49..a5ce2a8 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3479,7 +3479,6 @@ PP(pp_ucfirst)
      STRLEN tculen;  /* tculen is the byte length of the freshly titlecased (or
                      * lowercased) character stored in tmpbuf.  May be either
                      * UTF-8 or not, but in either case is the number of bytes */
-    bool tainted = FALSE;
  
      s = (const U8*)SvPV_const(source, slen);
  
@@ -3503,12 +3502,10 @@ PP(pp_ucfirst)
         doing_utf8 = TRUE;
          ulen = UTF8SKIP(s);
          if (op_type == OP_UCFIRST) {
-           _to_utf8_title_flags(s, tmpbuf, &tculen,
-                                cBOOL(IN_LOCALE_RUNTIME), &tainted);
+           _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
         }
          else {
-           _to_utf8_lower_flags(s, tmpbuf, &tculen,
-                                cBOOL(IN_LOCALE_RUNTIME), &tainted);
+           _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
         }
  
          /* we can't do in-place if the length changes.  */
@@ -3531,17 +3528,27 @@ PP(pp_ucfirst)
         }
         /* is ucfirst() */
         else if (IN_LOCALE_RUNTIME) {
-           *tmpbuf = toUPPER_LC(*s);   /* This would be a bug if any locales
-                                        * have upper and title case different
-                                        */
+            if (IN_UTF8_CTYPE_LOCALE) {
+                goto do_uni_rules;
+            }
+
+            *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
+                                              locales have upper and title case
+                                              different */
         }
         else if (! IN_UNI_8_BIT) {
             *tmpbuf = toUPPER(*s);      /* Returns caseless for non-ascii, or
                                          * on EBCDIC machines whatever the
                                          * native function does */
         }
-       else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
-           UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
+        else {
+            /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
+             * UTF-8, which we treat as not in locale), and cased latin1 */
+           UV title_ord;
+
+      do_uni_rules:
+
+           title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
             if (tculen > 1) {
                 assert(tculen == 2);
  
@@ -3642,17 +3649,9 @@ PP(pp_ucfirst)
             SvCUR_set(dest, need - 1);
         }
  
-       if (tainted) {
-           TAINT;
-           SvTAINTED_on(dest);
-       }
      }
      else {  /* Neither source nor dest are in or need to be UTF-8 */
         if (slen) {
-           if (IN_LOCALE_RUNTIME) {
-               TAINT;
-               SvTAINTED_on(dest);
-           }
             if (inplace) {  /* in-place, only need to change the 1st char */
                 *d = *tmpbuf;
             }
@@ -3680,6 +3679,10 @@ PP(pp_ucfirst)
             SvCUR_set(dest, need - 1);
         }
      }
+    if (IN_LOCALE_RUNTIME) {
+        TAINT;
+        SvTAINTED_on(dest);
+    }
      if (dest != source && SvTAINTED(source))
         SvTAINT(dest);
      SvSETMAGIC(dest);
@@ -3707,15 +3710,20 @@ PP(pp_uc)
         (SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
         && !SvREADONLY(source) && SvPOK(source)
         && !DO_UTF8(source)
-       && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
-
-       /* We can convert in place.  The reason we can't if in UNI_8_BIT is to
-        * make the loop tight, so we overwrite the source with the dest before
-        * looking at it, and we need to look at the original source
-        * afterwards.  There would also need to be code added to handle
-        * switching to not in-place in midstream if we run into characters
-        * that change the length.
-        */
+       && ((IN_LOCALE_RUNTIME)
+            ? ! IN_UTF8_CTYPE_LOCALE
+            : ! IN_UNI_8_BIT))
+    {
+
+        /* We can convert in place.  The reason we can't if in UNI_8_BIT is to
+         * make the loop tight, so we overwrite the source with the dest before
+         * looking at it, and we need to look at the original source
+         * afterwards.  There would also need to be code added to handle
+         * switching to not in-place in midstream if we run into characters
+         * that change the length.  Since being in locale overrides UNI_8_BIT,
+         * that latter becomes irrelevant in the above test; instead for
+         * locale, the size can't normally change, except if the locale is a
+         * UTF-8 one */
         dest = source;
         s = d = (U8*)SvPV_force_nomg(source, len);
         min = len + 1;
@@ -3740,7 +3748,6 @@ PP(pp_uc)
      if (DO_UTF8(source)) {
         const U8 *const send = s + len;
         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-       bool tainted = FALSE;
  
         /* All occurrences of these are to be moved to follow any other marks.
          * This is context-dependent.  We may not be passed enough context to
@@ -3770,8 +3777,7 @@ PP(pp_uc)
               * and copy it to the output buffer */
  
              u = UTF8SKIP(s);
-            uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
-                                     cBOOL(IN_LOCALE_RUNTIME), &tainted);
+            uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
  #define GREEK_CAPITAL_LETTER_IOTA 0x0399
  #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
              if (uv == GREEK_CAPITAL_LETTER_IOTA
@@ -3806,10 +3812,6 @@ PP(pp_uc)
         *d = '\0';
  
         SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
-       if (tainted) {
-           TAINT;
-           SvTAINTED_on(dest);
-       }
      }
      else {     /* Not UTF-8 */
         if (len) {
@@ -3819,10 +3821,11 @@ PP(pp_uc)
              * latin1 as having case; otherwise the latin1 casing.  Do the
              * whole thing in a tight loop, for speed, */
             if (IN_LOCALE_RUNTIME) {
-               TAINT;
-               SvTAINTED_on(dest);
+                if (IN_UTF8_CTYPE_LOCALE) {
+                    goto do_uni_rules;
+                }
                 for (; s < send; d++, s++)
-                   *d = toUPPER_LC(*s);
+                    *d = (U8) toUPPER_LC(*s);
             }
             else if (! IN_UNI_8_BIT) {
                 for (; s < send; d++, s++) {
@@ -3830,6 +3833,7 @@ PP(pp_uc)
                 }
             }
             else {
+          do_uni_rules:
                 for (; s < send; d++, s++) {
                     *d = toUPPER_LATIN1_MOD(*s);
                     if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
@@ -3918,6 +3922,10 @@ PP(pp_uc)
             SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
         }
      } /* End of isn't utf8 */
+    if (IN_LOCALE_RUNTIME) {
+        TAINT;
+        SvTAINTED_on(dest);
+    }
      if (dest != source && SvTAINTED(source))
         SvTAINT(dest);
      SvSETMAGIC(dest);
@@ -3970,14 +3978,12 @@ PP(pp_lc)
      if (DO_UTF8(source)) {
         const U8 *const send = s + len;
         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-       bool tainted = FALSE;
  
         while (s < send) {
             const STRLEN u = UTF8SKIP(s);
             STRLEN ulen;
  
-           _to_utf8_lower_flags(s, tmpbuf, &ulen,
-                                cBOOL(IN_LOCALE_RUNTIME), &tainted);
+           _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
  
             /* Here is where we would do context-sensitive actions.  See the
              * commit message for 86510fb15 for why there isn't any */
@@ -4007,10 +4013,6 @@ PP(pp_lc)
         SvUTF8_on(dest);
         *d = '\0';
         SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
-       if (tainted) {
-           TAINT;
-           SvTAINTED_on(dest);
-       }
      } else {   /* Not utf8 */
         if (len) {
             const U8 *const send = s + len;
@@ -4018,12 +4020,10 @@ PP(pp_lc)
             /* Use locale casing if in locale; regular style if not treating
              * latin1 as having case; otherwise the latin1 casing.  Do the
              * whole thing in a tight loop, for speed, */
-           if (IN_LOCALE_RUNTIME) {
-               TAINT;
-               SvTAINTED_on(dest);
+            if (IN_LOCALE_RUNTIME) {
                 for (; s < send; d++, s++)
                     *d = toLOWER_LC(*s);
-           }
+            }
             else if (! IN_UNI_8_BIT) {
                 for (; s < send; d++, s++) {
                     *d = toLOWER(*s);
@@ -4040,6 +4040,10 @@ PP(pp_lc)
             SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
         }
      }
+    if (IN_LOCALE_RUNTIME) {
+        TAINT;
+        SvTAINTED_on(dest);
+    }
      if (dest != source && SvTAINTED(source))
         SvTAINT(dest);
      SvSETMAGIC(dest);
@@ -4164,12 +4168,11 @@ PP(pp_fc)
  
      send = s + len;
      if (DO_UTF8(source)) { /* UTF-8 flagged string. */
-        bool tainted = FALSE;
          while (s < send) {
              const STRLEN u = UTF8SKIP(s);
              STRLEN ulen;
  
-            _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
+            _to_utf8_fold_flags(s, tmpbuf, &ulen, flags);
  
              if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
                  const UV o = d - (U8*)SvPVX_const(dest);
@@ -4182,23 +4185,21 @@ PP(pp_fc)
              s += u;
          }
          SvUTF8_on(dest);
-       if (tainted) {
-           TAINT;
-           SvTAINTED_on(dest);
-       }
      } /* Unflagged string */
      else if (len) {
          if ( IN_LOCALE_RUNTIME ) { /* Under locale */
-            TAINT;
-            SvTAINTED_on(dest);
+            if (IN_UTF8_CTYPE_LOCALE) {
+                goto do_uni_folding;
+            }
              for (; s < send; d++, s++)
-                *d = toFOLD_LC(*s);
+                *d = (U8) toFOLD_LC(*s);
          }
          else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
              for (; s < send; d++, s++)
                  *d = toFOLD(*s);
          }
          else {
+      do_uni_folding:
              /* For ASCII and the Latin-1 range, there's only two troublesome
               * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
               * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
@@ -4265,6 +4266,10 @@ PP(pp_fc)
      *d = '\0';
      SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
  
+    if (IN_LOCALE_RUNTIME) {
+        TAINT;
+        SvTAINTED_on(dest);
+    }
      if (SvTAINTED(source))
         SvTAINT(dest);
      SvSETMAGIC(dest);
@@ -5639,7 +5644,7 @@ PP(pp_split)
      else if (do_utf8 == (RX_UTF8(rx) != 0) &&
              (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
              && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
-            && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
+             && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) {
         const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
         SV * const csv = CALLREG_INTUIT_STRING(rx);