- if (!utf8 && !SvUTF8(cat)) {
- marked_upgrade(aTHX_ cat, symptr);
- lookahead.flags |= FLAG_DO_UTF8;
- lookahead.strbeg = symptr->strbeg;
- utf8 = 1;
- start = SvPVX(cat);
- cur = start + SvCUR(cat);
- }
- if (howlen == e_star) {
- if (utf8) goto string_copy;
- len = fromlen+1;
- }
- s = aptr;
- end = aptr + fromlen;
- fromlen = datumtype == 'Z' ? len-1 : len;
- while ((I32) fromlen > 0 && s < end) {
- s += UTF8SKIP(s);
- fromlen--;
- }
- if (s > end)
- Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
- if (utf8) {
- len = fromlen;
- if (datumtype == 'Z') len++;
- fromlen = s-aptr;
- len += fromlen;
-
- goto string_copy;
- }
- fromlen = len - fromlen;
- if (datumtype == 'Z') fromlen--;
- if (howlen == e_star) {
- len = fromlen;
- if (datumtype == 'Z') len++;
- }
- GROWING(0, cat, start, cur, len);
- if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
- datumtype | TYPE_IS_PACK))
- Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
- "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
- (int)datumtype, aptr, end, cur, (UV)fromlen);
- cur += fromlen;
- len -= fromlen;
- } else if (utf8) {
- if (howlen == e_star) {
- len = fromlen;
- if (datumtype == 'Z') len++;
- }
- if (len <= (I32) fromlen) {
- fromlen = len;
- if (datumtype == 'Z' && fromlen > 0) fromlen--;
- }
- /* assumes a byte expands to at most UTF8_EXPAND bytes on
- upgrade, so:
- expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
- GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
- len -= fromlen;
- while (fromlen > 0) {
- cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
- aptr++;
- fromlen--;
- }
- } else {
- string_copy:
- if (howlen == e_star) {
- len = fromlen;
- if (datumtype == 'Z') len++;
- }
- if (len <= (I32) fromlen) {
- fromlen = len;
- if (datumtype == 'Z' && fromlen > 0) fromlen--;
- }
- GROWING(0, cat, start, cur, len);
- Copy(aptr, cur, fromlen, char);
- cur += fromlen;
- len -= fromlen;
- }
- memset(cur, datumtype == 'A' ? ' ' : '\0', len);
- cur += len;
- SvTAINT(cat);
- break;
- }
- case 'B':
- case 'b': {
- const char *str, *end;
- I32 l, field_len;
- U8 bits;
- bool utf8_source;
- U32 utf8_flags;
-
- fromstr = NEXTFROM;
- str = SvPV_const(fromstr, fromlen);
- end = str + fromlen;
- if (DO_UTF8(fromstr)) {
- utf8_source = TRUE;
- utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
- } else {
- utf8_source = FALSE;
- utf8_flags = 0; /* Unused, but keep compilers happy */
- }
- if (howlen == e_star) len = fromlen;
- field_len = (len+7)/8;
- GROWING(utf8, cat, start, cur, field_len);
- if (len > (I32)fromlen) len = fromlen;
- bits = 0;
- l = 0;
- if (datumtype == 'B')
- while (l++ < len) {
- if (utf8_source) {
- UV val = 0;
- NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
- bits |= val & 1;
- } else bits |= *str++ & 1;
- if (l & 7) bits <<= 1;
- else {
- PUSH_BYTE(utf8, cur, bits);
- bits = 0;
- }
- }
- else
- /* datumtype == 'b' */
- while (l++ < len) {
- if (utf8_source) {
- UV val = 0;
- NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
- if (val & 1) bits |= 0x80;
- } else if (*str++ & 1)
- bits |= 0x80;
- if (l & 7) bits >>= 1;
- else {
- PUSH_BYTE(utf8, cur, bits);
- bits = 0;
- }
- }
- l--;
- if (l & 7) {
- if (datumtype == 'B')
- bits <<= 7 - (l & 7);
- else
- bits >>= 7 - (l & 7);
- PUSH_BYTE(utf8, cur, bits);
- l += 7;
- }
- /* Determine how many chars are left in the requested field */
- l /= 8;
- if (howlen == e_star) field_len = 0;
- else field_len -= l;
- Zero(cur, field_len, char);
- cur += field_len;
- break;
- }
- case 'H':
- case 'h': {
- const char *str, *end;
- I32 l, field_len;
- U8 bits;
- bool utf8_source;
- U32 utf8_flags;
-
- fromstr = NEXTFROM;
- str = SvPV_const(fromstr, fromlen);
- end = str + fromlen;
- if (DO_UTF8(fromstr)) {
- utf8_source = TRUE;
- utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
- } else {
- utf8_source = FALSE;
- utf8_flags = 0; /* Unused, but keep compilers happy */
- }
- if (howlen == e_star) len = fromlen;
- field_len = (len+1)/2;
- GROWING(utf8, cat, start, cur, field_len);
- if (!utf8_source && len > (I32)fromlen) len = fromlen;
- bits = 0;
- l = 0;
- if (datumtype == 'H')
- while (l++ < len) {
- if (utf8_source) {
- UV val = 0;
- NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
- if (val < 256 && isALPHA(val))
- bits |= (val + 9) & 0xf;
- else
- bits |= val & 0xf;
- } else if (isALPHA(*str))
- bits |= (*str++ + 9) & 0xf;
- else
- bits |= *str++ & 0xf;
- if (l & 1) bits <<= 4;
- else {
- PUSH_BYTE(utf8, cur, bits);
- bits = 0;
- }
- }
- else
- while (l++ < len) {
- if (utf8_source) {
- UV val = 0;
- NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
- if (val < 256 && isALPHA(val))
- bits |= ((val + 9) & 0xf) << 4;
- else
- bits |= (val & 0xf) << 4;
- } else if (isALPHA(*str))
- bits |= ((*str++ + 9) & 0xf) << 4;
- else
- bits |= (*str++ & 0xf) << 4;
- if (l & 1) bits >>= 4;
- else {
- PUSH_BYTE(utf8, cur, bits);
- bits = 0;
- }
- }
- l--;
- if (l & 1) {
- PUSH_BYTE(utf8, cur, bits);
- l++;
- }
- /* Determine how many chars are left in the requested field */
- l /= 2;
- if (howlen == e_star) field_len = 0;
- else field_len -= l;
- Zero(cur, field_len, char);
- cur += field_len;
- break;
- }
- case 'c':
- while (len-- > 0) {
- IV aiv;
- fromstr = NEXTFROM;
+ if (!utf8 && !SvUTF8(cat)) {
+ marked_upgrade(aTHX_ cat, symptr);
+ lookahead.flags |= FLAG_DO_UTF8;
+ lookahead.strbeg = symptr->strbeg;
+ utf8 = 1;
+ start = SvPVX(cat);
+ cur = start + SvCUR(cat);
+ }
+ if (howlen == e_star) {
+ if (utf8) goto string_copy;
+ len = fromlen+1;
+ }
+ s = aptr;
+ end = aptr + fromlen;
+ fromlen = datumtype == 'Z' ? len-1 : len;
+ while ((SSize_t) fromlen > 0 && s < end) {
+ s += UTF8SKIP(s);
+ fromlen--;
+ }
+ if (s > end)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+ if (utf8) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
+ fromlen = s-aptr;
+ len += fromlen;
+
+ goto string_copy;
+ }
+ fromlen = len - fromlen;
+ if (datumtype == 'Z') fromlen--;
+ if (howlen == e_star) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
+ }
+ GROWING(0, cat, start, cur, len);
+ if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
+ datumtype | TYPE_IS_PACK))
+ Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
+ "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
+ (int)datumtype, aptr, end, cur, fromlen);
+ cur += fromlen;
+ len -= fromlen;
+ } else if (utf8) {
+ if (howlen == e_star) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
+ }
+ if (len <= (SSize_t) fromlen) {
+ fromlen = len;
+ if (datumtype == 'Z' && fromlen > 0) fromlen--;
+ }
+ /* assumes a byte expands to at most UTF8_EXPAND bytes on
+ upgrade, so:
+ expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
+ GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
+ len -= fromlen;
+ while (fromlen > 0) {
+ cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
+ aptr++;
+ fromlen--;
+ }
+ } else {
+ string_copy:
+ if (howlen == e_star) {
+ len = fromlen;
+ if (datumtype == 'Z') len++;
+ }
+ if (len <= (SSize_t) fromlen) {
+ fromlen = len;
+ if (datumtype == 'Z' && fromlen > 0) fromlen--;
+ }
+ GROWING(0, cat, start, cur, len);
+ Copy(aptr, cur, fromlen, char);
+ cur += fromlen;
+ len -= fromlen;
+ }
+ memset(cur, datumtype == 'A' ? ' ' : '\0', len);
+ cur += len;
+ SvTAINT(cat);
+ break;
+ }
+ case 'B':
+ case 'b': {
+ const char *str, *end;
+ SSize_t l, field_len;
+ U8 bits;
+ bool utf8_source;
+ U32 utf8_flags;
+
+ fromstr = NEXTFROM;
+ str = SvPV_const(fromstr, fromlen);
+ end = str + fromlen;
+ if (DO_UTF8(fromstr)) {
+ utf8_source = TRUE;
+ utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
+ } else {
+ utf8_source = FALSE;
+ utf8_flags = 0; /* Unused, but keep compilers happy */
+ }
+ if (howlen == e_star) len = fromlen;
+ field_len = (len+7)/8;
+ GROWING(utf8, cat, start, cur, field_len);
+ if (len > (SSize_t)fromlen) len = fromlen;
+ bits = 0;
+ l = 0;
+ if (datumtype == 'B')
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val = 0;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ bits |= val & 1;
+ } else bits |= *str++ & 1;
+ if (l & 7) bits <<= 1;
+ else {
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
+ }
+ }
+ else
+ /* datumtype == 'b' */
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val = 0;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ if (val & 1) bits |= 0x80;
+ } else if (*str++ & 1)
+ bits |= 0x80;
+ if (l & 7) bits >>= 1;
+ else {
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
+ }
+ }
+ l--;
+ if (l & 7) {
+ if (datumtype == 'B')
+ bits <<= 7 - (l & 7);
+ else
+ bits >>= 7 - (l & 7);
+ PUSH_BYTE(utf8, cur, bits);
+ l += 7;
+ }
+ /* Determine how many chars are left in the requested field */
+ l /= 8;
+ if (howlen == e_star) field_len = 0;
+ else field_len -= l;
+ Zero(cur, field_len, char);
+ cur += field_len;
+ break;
+ }
+ case 'H':
+ case 'h': {
+ const char *str, *end;
+ SSize_t l, field_len;
+ U8 bits;
+ bool utf8_source;
+ U32 utf8_flags;
+
+ fromstr = NEXTFROM;
+ str = SvPV_const(fromstr, fromlen);
+ end = str + fromlen;
+ if (DO_UTF8(fromstr)) {
+ utf8_source = TRUE;
+ utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
+ } else {
+ utf8_source = FALSE;
+ utf8_flags = 0; /* Unused, but keep compilers happy */
+ }
+ if (howlen == e_star) len = fromlen;
+ field_len = (len+1)/2;
+ GROWING(utf8, cat, start, cur, field_len);
+ if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
+ bits = 0;
+ l = 0;
+ if (datumtype == 'H')
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val = 0;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ if (val < 256 && isALPHA(val))
+ bits |= (val + 9) & 0xf;
+ else
+ bits |= val & 0xf;
+ } else if (isALPHA(*str))
+ bits |= (*str++ + 9) & 0xf;
+ else
+ bits |= *str++ & 0xf;
+ if (l & 1) bits <<= 4;
+ else {
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
+ }
+ }
+ else
+ while (l++ < len) {
+ if (utf8_source) {
+ UV val = 0;
+ NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+ if (val < 256 && isALPHA(val))
+ bits |= ((val + 9) & 0xf) << 4;
+ else
+ bits |= (val & 0xf) << 4;
+ } else if (isALPHA(*str))
+ bits |= ((*str++ + 9) & 0xf) << 4;
+ else
+ bits |= (*str++ & 0xf) << 4;
+ if (l & 1) bits >>= 4;
+ else {
+ PUSH_BYTE(utf8, cur, bits);
+ bits = 0;
+ }
+ }
+ l--;
+ if (l & 1) {
+ PUSH_BYTE(utf8, cur, bits);
+ l++;
+ }
+ /* Determine how many chars are left in the requested field */
+ l /= 2;
+ if (howlen == e_star) field_len = 0;
+ else field_len -= l;
+ Zero(cur, field_len, char);
+ cur += field_len;
+ break;
+ }
+ case 'c':
+ while (len-- > 0) {
+ IV aiv;
+ fromstr = NEXTFROM;