This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate mainline
[perl5.git] / ext / Encode / Encode.xs
index fce3ca4..992fbfe 100644 (file)
+/*
+ $Id: Encode.xs,v 1.31 2002/04/20 23:43:47 dankogai Exp dankogai $
+ */
+
+#define PERL_NO_GET_CONTEXT
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
 #define U8 U8
 #include "encode.h"
-#include "iso8859.h"
-#include "EBCDIC.h"
-#include "Symbols.h"
 
-#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {   \
+/* set 1 or more to profile.  t/encoding.t dumps core because of
+   Perl_warner and PerlIO don't work well */
+#define ENCODE_XS_PROFILE 0
+
+/* set 0 to disable floating point to calculate buffer size for
+   encode_method().  1 is recommended. 2 restores NI-S original */
+#define ENCODE_XS_USEFP   1
+
+#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX;   \
                          Perl_croak(aTHX_ "panic_unimplemented"); \
                         return (y)0; /* fool picky compilers */ \
                          }
-UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
-UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
-
-#ifdef USE_PERLIO
-/* Define an encoding "layer" in the perliol.h sense.
-   The layer defined here "inherits" in an object-oriented sense from the
-   "perlio" layer with its PerlIOBuf_* "methods".
-   The implementation is particularly efficient as until Encode settles down
-   there is no point in tryint to tune it.
-
-   The layer works by overloading the "fill" and "flush" methods.
-
-   "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
-   to convert the encoded data to UTF-8 form, then copies it back to the
-   buffer. The "base class's" read methods then see the UTF-8 data.
-
-   "flush" transforms the UTF-8 data deposited by the "base class's write
-   method in the buffer back into the encoded form using the encode OO perl API,
-   then copies data back into the buffer and calls "SUPER::flush.
-
-   Note that "flush" is _also_ called for read mode - we still do the (back)-translate
-   so that the the base class's "flush" sees the correct number of encoded chars
-   for positioning the seek pointer. (This double translation is the worst performance
-   issue - particularly with all-perl encode engine.)
-
-*/
+/**/
+UNIMPLEMENTED(_encoded_utf8_to_bytes, I32);
+UNIMPLEMENTED(_encoded_bytes_to_utf8, I32);
 
-
-#include "perliol.h"
-
-typedef struct
-{
- PerlIOBuf     base;         /* PerlIOBuf stuff */
- SV *          bufsv;
- SV *          enc;
-} PerlIOEncode;
-
-
-IV
-PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len)
+void
+Encode_XSEncoding(pTHX_ encode_t * enc)
 {
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- dSP;
- IV code;
- code = PerlIOBuf_pushed(f,mode,Nullch,0);
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVpv("Encode",0)));
- XPUSHs(sv_2mortal(newSVpvn(arg,len)));
- PUTBACK;
- if (perl_call_method("getEncoding",G_SCALAR) != 1)
-  return -1;
- SPAGAIN;
- e->enc = POPs;
- PUTBACK;
- if (!SvROK(e->enc))
-  return -1;
- SvREFCNT_inc(e->enc);
- FREETMPS;
- LEAVE;
- PerlIOBase(f)->flags |= PERLIO_F_UTF8;
- return code;
+    dSP;
+    HV *stash = gv_stashpv("Encode::XS", TRUE);
+    SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
+    int i = 0;
+    PUSHMARK(sp);
+    XPUSHs(sv);
+    while (enc->name[i]) {
+       const char *name = enc->name[i++];
+       XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
+    }
+    PUTBACK;
+    call_pv("Encode::define_encoding", G_DISCARD);
+    SvREFCNT_dec(sv);
 }
 
-IV
-PerlIOEncode_popped(PerlIO *f)
+void
+call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
 {
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- if (e->enc)
-  {
-   SvREFCNT_dec(e->enc);
-   e->enc = Nullsv;
-  }
- if (e->bufsv)
-  {
-   SvREFCNT_dec(e->bufsv);
-   e->bufsv = Nullsv;
-  }
- return 0;
+    /* Exists for breakpointing */
 }
 
-STDCHAR *
-PerlIOEncode_get_base(PerlIO *f)
-{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- if (!e->base.bufsiz)
-  e->base.bufsiz = 1024;
- if (!e->bufsv)
-  {
-   e->bufsv = newSV(e->base.bufsiz);
-   sv_setpvn(e->bufsv,"",0);
-  }
- e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
- if (!e->base.ptr)
-  e->base.ptr = e->base.buf;
- if (!e->base.end)
-  e->base.end = e->base.buf;
- if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
-  {
-   Perl_warn(aTHX_ " ptr %p(%p)%p",
-             e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
-   abort();
-  }
- if (SvLEN(e->bufsv) < e->base.bufsiz)
-  {
-   SSize_t poff = e->base.ptr - e->base.buf;
-   SSize_t eoff = e->base.end - e->base.buf;
-   e->base.buf  = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz);
-   e->base.ptr  = e->base.buf + poff;
-   e->base.end  = e->base.buf + eoff;
-  }
- if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv))
-  {
-   Perl_warn(aTHX_ " ptr %p(%p)%p",
-             e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv));
-   abort();
-  }
- return e->base.buf;
-}
 
-IV
-PerlIOEncode_fill(PerlIO *f)
+static SV *
+encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
+             int check)
 {
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- dTHX;
- dSP;
- IV code;
- code = PerlIOBuf_fill(f);
- if (code == 0)
-  {
-   SV *uni;
-   STRLEN len;
-   char *s;
-   /* Set SV that is the buffer to be buf..ptr */
-   SvCUR_set(e->bufsv, e->base.end - e->base.buf);
-   SvUTF8_off(e->bufsv);
-   ENTER;
-   SAVETMPS;
-   PUSHMARK(sp);
-   XPUSHs(e->enc);
-   XPUSHs(e->bufsv);
-   XPUSHs(&PL_sv_yes);
-   PUTBACK;
-   if (perl_call_method("toUnicode",G_SCALAR) != 1)
-    code = -1;
-   SPAGAIN;
-   uni = POPs;
-   PUTBACK;
-   /* Now get translated string (forced to UTF-8) and copy back to buffer
-      don't use sv_setsv as that may "steal" PV from returned temp
-      and so free() our known-large-enough buffer.
-      sv_setpvn() should do but let us do it long hand.
-    */
-   s = SvPVutf8(uni,len);
-   if (s != SvPVX(e->bufsv))
-    {
-     e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
-     Move(s,e->base.buf,len,char);
-     SvCUR_set(e->bufsv,len);
+    STRLEN slen;
+    U8 *s = (U8 *) SvPV(src, slen);
+    STRLEN tlen  = slen;
+    STRLEN ddone = 0;
+    STRLEN sdone = 0;
+
+    /* We allocate slen+1.
+       PerlIO dumps core if this value is smaller than this. */
+    SV *dst = sv_2mortal(newSV(slen+1));
+    U8 *d = (U8 *)SvPVX(dst);
+    STRLEN dlen = SvLEN(dst)-1;
+    int code;
+
+    if (!slen){
+       SvCUR_set(dst, 0);
+       SvPOK_only(dst);
+       goto ENCODE_END;
     }
-   SvUTF8_on(e->bufsv);
-   e->base.end    = e->base.buf+len;
-   e->base.ptr    = e->base.buf;
-   FREETMPS;
-   LEAVE;
-  }
- return code;
-}
 
-IV
-PerlIOEncode_flush(PerlIO *f)
-{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- IV code = 0;
- dTHX;
- if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF)))
-  {
-   dSP;
-   SV *str;
-   char *s;
-   STRLEN len;
-   SSize_t left = 0;
-   if (PerlIOBase(f)->flags & PERLIO_F_RDBUF)
+    while (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check))
     {
-     /* This is really just a flag to see if we took all the data, if
-        we did PerlIOBase_flush avoids a seek to lower layer.
-        Need to revisit if we start getting clever with unreads or seeks-in-buffer
-      */
-     left = e->base.end - e->base.ptr;
+       SvCUR_set(dst, dlen+ddone);
+       SvPOK_only(dst);
+       
+       if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL){
+           break;
+       }
+       switch (code) {
+       case ENCODE_NOSPACE:
+       {       
+           STRLEN more = 0; /* make sure you initialize! */
+           STRLEN sleft;
+           sdone += slen;
+           ddone += dlen;
+           sleft = tlen - sdone;
+#if ENCODE_XS_PROFILE >= 2
+           Perl_warn(aTHX_
+                     "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
+                     more, sdone, sleft, SvLEN(dst));
+#endif
+           if (sdone != 0) { /* has src ever been processed ? */
+#if   ENCODE_XS_USEFP == 2
+               more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
+                   - SvLEN(dst);
+#elif ENCODE_XS_USEFP
+               more = (1.0*SvLEN(dst)+1)/sdone * sleft;
+#else
+               /* safe until SvLEN(dst) == MAX_INT/16 */
+               more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
+#endif
+           }
+           more += UTF8_MAXLEN; /* insurance policy */
+           d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
+           /* dst need to grow need MORE bytes! */
+           if (ddone >= SvLEN(dst)) {
+               Perl_croak(aTHX_ "Destination couldn't be grown.");
+           }
+           dlen = SvLEN(dst)-ddone-1;
+           d   += ddone;
+           s   += slen;
+           slen = tlen-sdone;
+           continue;
+       }
+       case ENCODE_NOREP:
+           /* encoding */      
+           if (dir == enc->f_utf8) {
+               STRLEN clen;
+               UV ch =
+                   utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
+                                  &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
+               if (check & ENCODE_DIE_ON_ERR) {
+                   Perl_croak(
+                       aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d",
+                       ch, enc->name[0], __LINE__);
+               }else{
+                   if (check & ENCODE_RETURN_ON_ERR){
+                       if (check & ENCODE_WARN_ON_ERR){
+                           Perl_warner(
+                               aTHX_ packWARN(WARN_UTF8),
+                               "\"\\N{U+%" UVxf "}\" does not map to %s",
+                               ch,enc->name[0]);
+                       }
+                               goto ENCODE_SET_SRC;
+                   }else if (check & ENCODE_PERLQQ){
+                       SV* perlqq =
+                           sv_2mortal(newSVpvf("\\x{%04x}", ch));
+                       sdone += slen + clen;
+                       ddone += dlen + SvCUR(perlqq);
+                       sv_catsv(dst, perlqq);
+                   } else {
+                       /* fallback char */
+                       sdone += slen + clen;
+                       ddone += dlen + enc->replen;
+                       sv_catpvn(dst, (char*)enc->rep, enc->replen);
+                   }                   
+               }
+           }
+           /* decoding */
+           else {
+               if (check & ENCODE_DIE_ON_ERR){
+                   Perl_croak(
+                       aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
+                       enc->name[0], (U8) s[slen], code);
+               }else{
+                   if (check & ENCODE_RETURN_ON_ERR){
+                       if (check & ENCODE_WARN_ON_ERR){
+                           Perl_warner(
+                               aTHX_ packWARN(WARN_UTF8),
+                               "%s \"\\x%02X\" does not map to Unicode (%d)",
+                               enc->name[0], (U8) s[slen], code);
+                       }
+                       goto ENCODE_SET_SRC;
+                   }else if (check & ENCODE_PERLQQ){
+                       SV* perlqq =
+                           sv_2mortal(newSVpvf("\\x%02X", s[slen]));
+                       sdone += slen + 1;
+                       ddone += dlen + SvCUR(perlqq);
+                       sv_catsv(dst, perlqq);
+                   } else {
+                       sdone += slen + 1;
+                       ddone += dlen + strlen(FBCHAR_UTF8);
+                       sv_catpv(dst, FBCHAR_UTF8);
+                   }
+               }
+           }
+           /* settle variables when fallback */
+           d    = (U8 *)SvEND(dst);
+            dlen = SvLEN(dst) - ddone - 1;
+           s    = (U8*)SvPVX(src) + sdone;
+           slen = tlen - sdone;
+           break;
+
+       default:
+           Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
+                      code, (dir == enc->f_utf8) ? "to" : "from",
+                      enc->name[0]);
+           return &PL_sv_undef;
+       }
     }
-   ENTER;
-   SAVETMPS;
-   PUSHMARK(sp);
-   XPUSHs(e->enc);
-   SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
-   SvUTF8_on(e->bufsv);
-   XPUSHs(e->bufsv);
-   XPUSHs(&PL_sv_yes);
-   PUTBACK;
-   if (perl_call_method("fromUnicode",G_SCALAR) != 1)
-    code = -1;
-   SPAGAIN;
-   str = POPs;
-   PUTBACK;
-   s = SvPV(str,len);
-   if (s != SvPVX(e->bufsv))
-    {
-     e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len);
-     Move(s,e->base.buf,len,char);
-     SvCUR_set(e->bufsv,len);
+ ENCODE_SET_SRC:
+    if (check && !(check & ENCODE_LEAVE_SRC)){
+       sdone = SvCUR(src) - (slen+sdone);
+       if (sdone) {
+           sv_setpvn(src, (char*)s+slen, sdone);
+       }
+       SvCUR_set(src, sdone);
+    }
+    /* warn("check = 0x%X, code = 0x%d\n", check, code); */
+    if (code && !(check & ENCODE_RETURN_ON_ERR)) {
+       return &PL_sv_undef;
     }
-   SvUTF8_off(e->bufsv);
-   e->base.ptr = e->base.buf+len;
-   /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */
-   e->base.end = e->base.ptr + left;
-   FREETMPS;
-   LEAVE;
-   if (PerlIOBuf_flush(f) != 0)
-    code = -1;
-  }
- return code;
-}
-
-IV
-PerlIOEncode_close(PerlIO *f)
-{
- PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode);
- IV code = PerlIOBase_close(f);
- dTHX;
- if (e->bufsv)
-  {
-   SvREFCNT_dec(e->bufsv);
-   e->bufsv = Nullsv;
-  }
- e->base.buf = NULL;
- e->base.ptr = NULL;
- e->base.end = NULL;
- PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF);
- return code;
-}
 
-Off_t
-PerlIOEncode_tell(PerlIO *f)
-{
- PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf);
- /* Unfortunately the only way to get a postion is to back-translate,
-    the UTF8-bytes we have buf..ptr and adjust accordingly.
-    But we will try and save any unread data in case stream
-    is un-seekable.
-  */
- if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end)
-  {
-   Size_t count = b->end - b->ptr;
-   PerlIO_push(f,&PerlIO_pending,"r",Nullch,0);
-   /* Save what we have left to read */
-   PerlIOSelf(f,PerlIOBuf)->bufsiz = count;
-   PerlIO_unread(f,b->ptr,count);
-   /* There isn't any unread data - we just saved it - so avoid the lower seek */
-   b->end = b->ptr;
-   /* Flush ourselves - now one layer down,
-      this does the back translate and adjusts position
-    */
-   PerlIO_flush(PerlIONext(f));
-   /* Set position of the saved data */
-   PerlIOSelf(f,PerlIOBuf)->posn = b->posn;
-  }
- else
-  {
-   PerlIO_flush(f);
-  }
- return b->posn;
-}
+    SvCUR_set(dst, dlen+ddone);
+    SvPOK_only(dst);
 
-PerlIO_funcs PerlIO_encode = {
- "encoding",
- sizeof(PerlIOEncode),
- PERLIO_K_BUFFERED,
- PerlIOBase_fileno,
- PerlIOBuf_fdopen,
- PerlIOBuf_open,
- PerlIOBuf_reopen,
- PerlIOEncode_pushed,
- PerlIOEncode_popped,
- PerlIOBuf_read,
- PerlIOBuf_unread,
- PerlIOBuf_write,
- PerlIOBuf_seek,
- PerlIOEncode_tell,
- PerlIOEncode_close,
- PerlIOEncode_flush,
- PerlIOEncode_fill,
- PerlIOBase_eof,
- PerlIOBase_error,
- PerlIOBase_clearerr,
- PerlIOBuf_setlinebuf,
- PerlIOEncode_get_base,
- PerlIOBuf_bufsiz,
- PerlIOBuf_get_ptr,
- PerlIOBuf_get_cnt,
- PerlIOBuf_set_ptrcnt,
-};
+#if ENCODE_XS_PROFILE
+    if (SvCUR(dst) > SvCUR(src)){
+       Perl_warn(aTHX_
+                 "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n",
+                 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
+                 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
+    }
 #endif
 
-void
-Encode_Define(pTHX_ encode_t *enc)
-{
- HV *hash  = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI);
- HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv    = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
- hv_store(hash,enc->name,strlen(enc->name),sv,0);
+ ENCODE_END:
+    *SvEND(dst) = '\0';
+    return dst;
 }
 
-void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
+MODULE = Encode                PACKAGE = Encode::XS    PREFIX = Method_
 
-static SV *
-encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
-{
- STRLEN slen;
- U8 *s = (U8 *) SvPV(src,slen);
- SV *dst = sv_2mortal(newSV(2*slen+1));
- if (slen)
-  {
-   U8 *d = (U8 *) SvGROW(dst, 2*slen+1);
-   STRLEN dlen = SvLEN(dst);
-   int code;
-   while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check)))
-    {
-     SvCUR_set(dst,dlen);
-     SvPOK_on(dst);
-
-     if (code == ENCODE_FALLBACK)
-      break;
-
-     switch(code)
-      {
-       case ENCODE_NOSPACE:
-        {
-         STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN);
-         if (need <= SvLEN(dst))
-          need += UTF8_MAXLEN;
-         d = (U8 *) SvGROW(dst, need);
-         dlen = SvLEN(dst);
-         slen = SvCUR(src);
-         break;
-        }
-
-       case ENCODE_NOREP:
-        if (dir == enc->f_utf8)
-         {
-          if (!check && ckWARN_d(WARN_UTF8))
-           {
-            STRLEN clen;
-            UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0);
-            Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%"UVxf"}\" does not map to %s", ch, enc->name);
-            /* FIXME: Skip over the character, copy in replacement and continue
-             * but that is messy so for now just fail.
-             */
-            return &PL_sv_undef;
-           }
-          else
-           {
-            return &PL_sv_undef;
-           }
-         }
-        else
-         {
-          /* UTF-8 is supposed to be "Universal" so should not happen */
-          Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
-                 enc->name, (int)(SvCUR(src)-slen),s+slen);
-         }
-        break;
-
-       case ENCODE_PARTIAL:
-         if (!check && ckWARN_d(WARN_UTF8))
-          {
-           Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
-                       (dir == enc->f_utf8) ? "UTF-8" : enc->name);
-          }
-         return &PL_sv_undef;
-
-       default:
-        Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
-                 code, (dir == enc->f_utf8) ? "to" : "from",enc->name);
-        return &PL_sv_undef;
-      }
-    }
-   SvCUR_set(dst,dlen);
-   SvPOK_on(dst);
-   if (check)
-    {
-     if (slen < SvCUR(src))
-      {
-       Move(s+slen,s,SvCUR(src)-slen,U8);
-      }
-     SvCUR_set(src,SvCUR(src)-slen);
-    }
-  }
- return dst;
-}
-
-MODULE = Encode                PACKAGE = Encode        PREFIX = sv_
+PROTOTYPES: ENABLE
 
 void
-valid_utf8(sv)
-SV *   sv
+Method_name(obj)
+SV *   obj
 CODE:
- {
-  STRLEN len;
-  char *s = SvPV(sv,len);
-  if (!SvUTF8(sv) || is_utf8_string((U8*)s,len))
-   XSRETURN_YES;
-  else
-   XSRETURN_NO;
- }
-
-void
-sv_utf8_encode(sv)
-SV *   sv
-
-bool
-sv_utf8_decode(sv)
-SV *   sv
-
-void
-sv_utf8_upgrade(sv)
-SV *   sv
-
-bool
-sv_utf8_downgrade(sv,failok=0)
-SV *   sv
-bool   failok
-
-MODULE = Encode                PACKAGE = Encode::XS    PREFIX = Encode_
-
-PROTOTYPES: ENABLE
+{
+    encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+    ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
+    XSRETURN(1);
+}
 
 void
-Encode_toUnicode(obj,src,check = 0)
+Method_decode(obj,src,check = 0)
 SV *   obj
 SV *   src
 int    check
 CODE:
- {
-  encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
-  ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
-  SvUTF8_on(ST(0));
-  XSRETURN(1);
- }
+{
+    encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+    ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check);
+    SvUTF8_on(ST(0));
+    XSRETURN(1);
+}
 
 void
-Encode_fromUnicode(obj,src,check = 0)
+Method_encode(obj,src,check = 0)
 SV *   obj
 SV *   src
 int    check
 CODE:
- {
-  encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
-  sv_utf8_upgrade(src);
-  ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
-  XSRETURN(1);
- }
+{
+    encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+    sv_utf8_upgrade(src);
+    ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check);
+    XSRETURN(1);
+}
 
 MODULE = Encode         PACKAGE = Encode
 
@@ -483,234 +269,224 @@ PROTOTYPES: ENABLE
 
 I32
 _bytes_to_utf8(sv, ...)
-        SV *    sv
-      CODE:
-        {
-          SV * encoding = items == 2 ? ST(1) : Nullsv;
-
-          if (encoding)
-            RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
-          else {
-            STRLEN len;
-            U8*    s = (U8*)SvPV(sv, len);
-            U8*    converted;
-
-            converted = bytes_to_utf8(s, &len); /* This allocs */
-            sv_setpvn(sv, (char *)converted, len);
-            SvUTF8_on(sv); /* XXX Should we? */
-            Safefree(converted);                /* ... so free it */
-            RETVAL = len;
-          }
-        }
-      OUTPUT:
-        RETVAL
+SV *    sv
+CODE:
+{
+    SV * encoding = items == 2 ? ST(1) : Nullsv;
+
+    if (encoding)
+    RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
+    else {
+       STRLEN len;
+       U8*    s = (U8*)SvPV(sv, len);
+       U8*    converted;
+
+       converted = bytes_to_utf8(s, &len); /* This allocs */
+       sv_setpvn(sv, (char *)converted, len);
+       SvUTF8_on(sv); /* XXX Should we? */
+       Safefree(converted);                /* ... so free it */
+       RETVAL = len;
+    }
+}
+OUTPUT:
+    RETVAL
 
 I32
 _utf8_to_bytes(sv, ...)
-        SV *    sv
-      CODE:
-        {
-          SV * to    = items > 1 ? ST(1) : Nullsv;
-          SV * check = items > 2 ? ST(2) : Nullsv;
-
-          if (to)
-            RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
-          else {
-            STRLEN len;
-            U8 *s = (U8*)SvPV(sv, len);
-
-            if (SvTRUE(check)) {
-              /* Must do things the slow way */
-              U8 *dest;
-              U8 *src  = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
-              U8 *send = s + len;
-
-              New(83, dest, len, U8); /* I think */
-
-              while (s < send) {
-                if (*s < 0x80)
-                  *dest++ = *s++;
-                else {
-                  STRLEN ulen;
-                 UV uv = *s++;
-
-                  /* Have to do it all ourselves because of error routine,
-                    aargh. */
-                 if (!(uv & 0x40))
-                   goto failure;
-                 if      (!(uv & 0x20)) { ulen = 2;  uv &= 0x1f; }
-                 else if (!(uv & 0x10)) { ulen = 3;  uv &= 0x0f; }
-                 else if (!(uv & 0x08)) { ulen = 4;  uv &= 0x07; }
-                 else if (!(uv & 0x04)) { ulen = 5;  uv &= 0x03; }
-                 else if (!(uv & 0x02)) { ulen = 6;  uv &= 0x01; }
-                 else if (!(uv & 0x01)) { ulen = 7;  uv = 0; }
-                 else                   { ulen = 13; uv = 0; }
-               
-                 /* Note change to utf8.c variable naming, for variety */
-                 while (ulen--) {
-                   if ((*s & 0xc0) != 0x80)
-                     goto failure;
+SV *    sv
+CODE:
+{
+    SV * to    = items > 1 ? ST(1) : Nullsv;
+    SV * check = items > 2 ? ST(2) : Nullsv;
+
+    if (to) {
+       RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
+    } else {
+       STRLEN len;
+       U8 *s = (U8*)SvPV(sv, len);
+
+       RETVAL = 0;
+       if (SvTRUE(check)) {
+           /* Must do things the slow way */
+           U8 *dest;
+            /* We need a copy to pass to check() */
+           U8 *src  = (U8*)savepv((char *)s);
+           U8 *send = s + len;
+
+           New(83, dest, len, U8); /* I think */
+
+           while (s < send) {
+                if (*s < 0x80){
+                   *dest++ = *s++;
+                } else {
+                   STRLEN ulen;
+                   UV uv = *s++;
+
+                   /* Have to do it all ourselves because of error routine,
+                      aargh. */
+                   if (!(uv & 0x40)){ goto failure; }
+                   if      (!(uv & 0x20)) { ulen = 2;  uv &= 0x1f; }
+                   else if (!(uv & 0x10)) { ulen = 3;  uv &= 0x0f; }
+                   else if (!(uv & 0x08)) { ulen = 4;  uv &= 0x07; }
+                   else if (!(uv & 0x04)) { ulen = 5;  uv &= 0x03; }
+                   else if (!(uv & 0x02)) { ulen = 6;  uv &= 0x01; }
+                   else if (!(uv & 0x01)) { ulen = 7;  uv = 0; }
+                   else                   { ulen = 13; uv = 0; }
                
-                   else
-                     uv = (uv << 6) | (*s++ & 0x3f);
+                   /* Note change to utf8.c variable naming, for variety */
+                   while (ulen--) {
+                       if ((*s & 0xc0) != 0x80){
+                           goto failure;
+                       } else {
+                           uv = (uv << 6) | (*s++ & 0x3f);
+                       }
                  }
                  if (uv > 256) {
                  failure:
-                   call_failure(check, s, dest, src);
-                   /* Now what happens? */
+                     call_failure(check, s, dest, src);
+                     /* Now what happens? */
                  }
                  *dest++ = (U8)uv;
-               }
-               }
-           } else
-             RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
-         }
+               }
+           }
+       } else {
+           RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
        }
-      OUTPUT:
-       RETVAL
+    }
+}
+OUTPUT:
+    RETVAL
 
-SV *
-_chars_to_utf8(sv, from, ...)
-       SV *    sv
-       SV *    from
-      CODE:
-       {
-         SV * check = items == 3 ? ST(2) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
+bool
+is_utf8(sv, check = 0)
+SV *   sv
+int    check
+CODE:
+{
+    if (SvGMAGICAL(sv)) /* it could be $1, for example */
+       sv = newSVsv(sv); /* GMAGIG will be done */
+    if (SvPOK(sv)) {
+       RETVAL = SvUTF8(sv) ? TRUE : FALSE;
+       if (RETVAL &&
+           check  &&
+           !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
+           RETVAL = FALSE;
+    } else {
+       RETVAL = FALSE;
+    }
+    if (sv != ST(0))
+       SvREFCNT_dec(sv); /* it was a temp copy */
+}
+OUTPUT:
+    RETVAL
 
 SV *
-_utf8_to_chars(sv, to, ...)
-       SV *    sv
-       SV *    to
-      CODE:
-       {
-         SV * check = items == 3 ? ST(2) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
+_utf8_on(sv)
+SV *   sv
+CODE:
+{
+    if (SvPOK(sv)) {
+       SV *rsv = newSViv(SvUTF8(sv));
+       RETVAL = rsv;
+       SvUTF8_on(sv);
+    } else {
+       RETVAL = &PL_sv_undef;
+    }
+}
+OUTPUT:
+    RETVAL
 
 SV *
-_utf8_to_chars_check(sv, ...)
-       SV *    sv
-      CODE:
-       {
-         SV * check = items == 2 ? ST(1) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
+_utf8_off(sv)
+SV *   sv
+CODE:
+{
+    if (SvPOK(sv)) {
+       SV *rsv = newSViv(SvUTF8(sv));
+       RETVAL = rsv;
+       SvUTF8_off(sv);
+    } else {
+       RETVAL = &PL_sv_undef;
+    }
+}
+OUTPUT:
+    RETVAL
 
-SV *
-_bytes_to_chars(sv, from, ...)
-       SV *    sv
-       SV *    from
-      CODE:
-       {
-         SV * check = items == 3 ? ST(2) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
+PROTOTYPES: DISABLE
 
-SV *
-_chars_to_bytes(sv, to, ...)
-       SV *    sv
-       SV *    to
-      CODE:
-       {
-         SV * check = items == 3 ? ST(2) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
 
-SV *
-_from_to(sv, from, to, ...)
-       SV *    sv
-       SV *    from
-       SV *    to
-      CODE:
-       {
-         SV * check = items == 4 ? ST(3) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
+int
+DIE_ON_ERR()
+CODE:
+    RETVAL = ENCODE_DIE_ON_ERR;
+OUTPUT:
+    RETVAL
 
-bool
-_is_utf8(sv, ...)
-       SV *    sv
-      CODE:
-       {
-         SV *  check = items == 2 ? ST(1) : Nullsv;
-         if (SvPOK(sv)) {
-           RETVAL = SvUTF8(sv) ? 1 : 0;
-           if (RETVAL &&
-               SvTRUE(check) &&
-               !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
-             RETVAL = FALSE;
-         } else {
-           RETVAL = FALSE;
-         }
-       }
-      OUTPUT:
-       RETVAL
+int
+WARN_ON_ERR()
+CODE:
+    RETVAL = ENCODE_WARN_ON_ERR;
+OUTPUT:
+    RETVAL
 
-SV *
-_on_utf8(sv)
-       SV *    sv
-      CODE:
-       {
-         if (SvPOK(sv)) {
-           SV *rsv = newSViv(SvUTF8(sv));
-           RETVAL = rsv;
-           SvUTF8_on(sv);
-         } else {
-           RETVAL = &PL_sv_undef;
-         }
-       }
-      OUTPUT:
-       RETVAL
+int
+LEAVE_SRC()
+CODE:
+    RETVAL = ENCODE_LEAVE_SRC;
+OUTPUT:
+    RETVAL
 
-SV *
-_off_utf8(sv)
-       SV *    sv
-      CODE:
-       {
-         if (SvPOK(sv)) {
-           SV *rsv = newSViv(SvUTF8(sv));
-           RETVAL = rsv;
-           SvUTF8_off(sv);
-         } else {
-           RETVAL = &PL_sv_undef;
-         }
-       }
-      OUTPUT:
-       RETVAL
+int
+RETURN_ON_ERR()
+CODE:
+    RETVAL = ENCODE_RETURN_ON_ERR;
+OUTPUT:
+    RETVAL
 
-SV *
-_utf_to_utf(sv, from, to, ...)
-       SV *    sv
-       SV *    from
-       SV *    to
-      CODE:
-       {
-         SV * check = items == 4 ? ST(3) : Nullsv;
-         RETVAL = &PL_sv_undef;
-       }
-      OUTPUT:
-       RETVAL
+int
+PERLQQ()
+CODE:
+    RETVAL = ENCODE_PERLQQ;
+OUTPUT:
+    RETVAL
+
+int
+FB_DEFAULT()
+CODE:
+    RETVAL = ENCODE_FB_DEFAULT;
+OUTPUT:
+    RETVAL
+
+int
+FB_CROAK()
+CODE:
+    RETVAL = ENCODE_FB_CROAK;
+OUTPUT:
+    RETVAL
+
+int
+FB_QUIET()
+CODE:
+    RETVAL = ENCODE_FB_QUIET;
+OUTPUT:
+    RETVAL
+
+int
+FB_WARN()
+CODE:
+    RETVAL = ENCODE_FB_WARN;
+OUTPUT:
+    RETVAL
+
+int
+FB_PERLQQ()
+CODE:
+    RETVAL = ENCODE_FB_PERLQQ;
+OUTPUT:
+    RETVAL
 
 BOOT:
 {
-#ifdef USE_PERLIO
- PerlIO_define_layer(&PerlIO_encode);
-#endif
-#include "iso8859.def"
-#include "EBCDIC.def"
-#include "Symbols.def"
+#include "def_t.h"
+#include "def_t.exh"
 }