+static void
+S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
+{
+ dVAR;
+ STRLEN len;
+ char *s;
+
+ PERL_ARGS_ASSERT_DO_CHOMP;
+
+ if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
+ return;
+ if (SvTYPE(sv) == SVt_PVAV) {
+ I32 i;
+ AV *const av = MUTABLE_AV(sv);
+ const I32 max = AvFILL(av);
+
+ for (i = 0; i <= max; i++) {
+ sv = MUTABLE_SV(av_fetch(av, i, FALSE));
+ if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
+ do_chomp(retval, sv, chomping);
+ }
+ return;
+ }
+ else if (SvTYPE(sv) == SVt_PVHV) {
+ HV* const hv = MUTABLE_HV(sv);
+ HE* entry;
+ (void)hv_iterinit(hv);
+ while ((entry = hv_iternext(hv)))
+ do_chomp(retval, hv_iterval(hv,entry), chomping);
+ return;
+ }
+ else if (SvREADONLY(sv)) {
+ if (SvFAKE(sv)) {
+ /* SV is copy-on-write */
+ sv_force_normal_flags(sv, 0);
+ }
+ if (SvREADONLY(sv))
+ Perl_croak_no_modify(aTHX);
+ }
+
+ if (PL_encoding) {
+ if (!SvUTF8(sv)) {
+ /* XXX, here sv is utf8-ized as a side-effect!
+ If encoding.pm is used properly, almost string-generating
+ operations, including literal strings, chr(), input data, etc.
+ should have been utf8-ized already, right?
+ */
+ sv_recode_to_utf8(sv, PL_encoding);
+ }
+ }
+
+ s = SvPV(sv, len);
+ if (chomping) {
+ char *temp_buffer = NULL;
+ SV *svrecode = NULL;
+
+ if (s && len) {
+ s += --len;
+ if (RsPARA(PL_rs)) {
+ if (*s != '\n')
+ goto nope;
+ ++SvIVX(retval);
+ while (len && s[-1] == '\n') {
+ --len;
+ --s;
+ ++SvIVX(retval);
+ }
+ }
+ else {
+ STRLEN rslen, rs_charlen;
+ const char *rsptr = SvPV_const(PL_rs, rslen);
+
+ rs_charlen = SvUTF8(PL_rs)
+ ? sv_len_utf8(PL_rs)
+ : rslen;
+
+ if (SvUTF8(PL_rs) != SvUTF8(sv)) {
+ /* Assumption is that rs is shorter than the scalar. */
+ if (SvUTF8(PL_rs)) {
+ /* RS is utf8, scalar is 8 bit. */
+ bool is_utf8 = TRUE;
+ temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
+ &rslen, &is_utf8);
+ if (is_utf8) {
+ /* Cannot downgrade, therefore cannot possibly match
+ */
+ assert (temp_buffer == rsptr);
+ temp_buffer = NULL;
+ goto nope;
+ }
+ rsptr = temp_buffer;
+ }
+ else if (PL_encoding) {
+ /* RS is 8 bit, encoding.pm is used.
+ * Do not recode PL_rs as a side-effect. */
+ svrecode = newSVpvn(rsptr, rslen);
+ sv_recode_to_utf8(svrecode, PL_encoding);
+ rsptr = SvPV_const(svrecode, rslen);
+ rs_charlen = sv_len_utf8(svrecode);
+ }
+ else {
+ /* RS is 8 bit, scalar is utf8. */
+ temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
+ rsptr = temp_buffer;
+ }
+ }
+ if (rslen == 1) {
+ if (*s != *rsptr)
+ goto nope;
+ ++SvIVX(retval);
+ }
+ else {
+ if (len < rslen - 1)
+ goto nope;
+ len -= rslen - 1;
+ s -= rslen - 1;
+ if (memNE(s, rsptr, rslen))
+ goto nope;
+ SvIVX(retval) += rs_charlen;
+ }
+ }
+ s = SvPV_force_nolen(sv);
+ SvCUR_set(sv, len);
+ *SvEND(sv) = '\0';
+ SvNIOK_off(sv);
+ SvSETMAGIC(sv);
+ }
+ nope:
+
+ SvREFCNT_dec(svrecode);
+
+ Safefree(temp_buffer);
+ } else {
+ if (len && !SvPOK(sv))
+ s = SvPV_force_nomg(sv, len);
+ if (DO_UTF8(sv)) {
+ if (s && len) {
+ char * const send = s + len;
+ char * const start = s;
+ s = send - 1;
+ while (s > start && UTF8_IS_CONTINUATION(*s))
+ s--;
+ if (is_utf8_string((U8*)s, send - s)) {
+ sv_setpvn(retval, s, send - s);
+ *s = '\0';
+ SvCUR_set(sv, s - start);
+ SvNIOK_off(sv);
+ SvUTF8_on(retval);
+ }
+ }
+ else
+ sv_setpvs(retval, "");
+ }
+ else if (s && len) {
+ s += --len;
+ sv_setpvn(retval, s, 1);
+ *s = '\0';
+ SvCUR_set(sv, len);
+ SvUTF8_off(sv);
+ SvNIOK_off(sv);
+ }
+ else
+ sv_setpvs(retval, "");
+ SvSETMAGIC(sv);
+ }
+}
+