- if (squash) {
- UV puv = 0xfeedface;
- while (s < send) {
- UV uv = swash_fetch(rv, s, TRUE);
-
- if (d > dend) {
- const STRLEN clen = d - dstart;
- const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
- if (!grows)
- Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
- Renew(dstart, nlen + UTF8_MAXBYTES, U8);
- d = dstart + clen;
- dend = dstart + nlen;
- }
- if (uv < none) {
- matches++;
- s += UTF8SKIP(s);
- if (uv != puv) {
- d = uvchr_to_utf8(d, uv);
- puv = uv;
- }
- continue;
- }
- else if (uv == none) { /* "none" is unmapped character */
- const int i = UTF8SKIP(s);
- Move(s, d, i, U8);
- d += i;
- s += i;
- puv = 0xfeedface;
- continue;
- }
- else if (uv == extra && !del) {
- matches++;
- if (havefinal) {
- s += UTF8SKIP(s);
- if (puv != final) {
- d = uvchr_to_utf8(d, final);
- puv = final;
- }
- }
- else {
- STRLEN len;
- uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT);
- if (uv != puv) {
- Move(s, d, len, U8);
- d += len;
- puv = uv;
- }
- s += len;
- }
- continue;
- }
- matches++; /* "none+1" is delete character */
- s += UTF8SKIP(s);
- }
- }
- else {
- while (s < send) {
- const UV uv = swash_fetch(rv, s, TRUE);
- if (d > dend) {
- const STRLEN clen = d - dstart;
- const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
- if (!grows)
- Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
- Renew(dstart, nlen + UTF8_MAXBYTES, U8);
- d = dstart + clen;
- dend = dstart + nlen;
- }
- if (uv < none) {
- matches++;
- s += UTF8SKIP(s);
- d = uvchr_to_utf8(d, uv);
- continue;
- }
- else if (uv == none) { /* "none" is unmapped character */
- const int i = UTF8SKIP(s);
- Move(s, d, i, U8);
- d += i;
- s += i;
- continue;
- }
- else if (uv == extra && !del) {
- matches++;
- s += UTF8SKIP(s);
- d = uvchr_to_utf8(d, final);
- continue;
- }
- matches++; /* "none+1" is delete character */
- s += UTF8SKIP(s);
+ restart:
+
+ /* Do the actual transliteration */
+ while (s < send) {
+ UV from;
+ UV to;
+ SSize_t i;
+ STRLEN s_len;
+
+ /* Get the code point of the next character in the string */
+ if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) {
+ from = *s;
+ s_len = 1;
+ }
+ else {
+ from = utf8_to_uvchr_buf(s, send, &s_len);
+ if (from == 0 && *s != '\0') {
+ _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
+ }
+ }
+
+ /* Look the code point up in the data structure for this tr/// to get
+ * what it maps to */
+ i = _invlist_search(from_invlist, from);
+ assert(i >= 0);
+
+ to = map[i];
+
+ if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */
+ if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) {
+ *d++ = (U8) from;
+ }
+ else if (SvUTF8(sv)) {
+ Move(s, d, s_len, U8);
+ d += s_len;
+ }
+ else { /* Convert to UTF-8 */
+ append_utf8_from_native_byte(*s, &d);
+ }
+
+ previous_map = to;
+ s += s_len;
+ continue;