X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/0844c848650aa2551dce1a55f02e43855fb98df9..63b38b604ef39ee14b693bccd8b54d416e832d2c:/ext/Encode/Encode.xs diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index fce3ca4..992fbfe 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,481 +1,267 @@ +/* + $Id: Encode.xs,v 1.31 2002/04/20 23:43:47 dankogai Exp dankogai $ + */ + +#define PERL_NO_GET_CONTEXT #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define U8 U8 #include "encode.h" -#include "iso8859.h" -#include "EBCDIC.h" -#include "Symbols.h" -#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ +/* set 1 or more to profile. t/encoding.t dumps core because of + Perl_warner and PerlIO don't work well */ +#define ENCODE_XS_PROFILE 0 + +/* set 0 to disable floating point to calculate buffer size for + encode_method(). 1 is recommended. 2 restores NI-S original */ +#define ENCODE_XS_USEFP 1 + +#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ Perl_croak(aTHX_ "panic_unimplemented"); \ return (y)0; /* fool picky compilers */ \ } -UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) -UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) - -#ifdef USE_PERLIO -/* Define an encoding "layer" in the perliol.h sense. - The layer defined here "inherits" in an object-oriented sense from the - "perlio" layer with its PerlIOBuf_* "methods". - The implementation is particularly efficient as until Encode settles down - there is no point in tryint to tune it. - - The layer works by overloading the "fill" and "flush" methods. - - "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API - to convert the encoded data to UTF-8 form, then copies it back to the - buffer. The "base class's" read methods then see the UTF-8 data. - - "flush" transforms the UTF-8 data deposited by the "base class's write - method in the buffer back into the encoded form using the encode OO perl API, - then copies data back into the buffer and calls "SUPER::flush. - - Note that "flush" is _also_ called for read mode - we still do the (back)-translate - so that the the base class's "flush" sees the correct number of encoded chars - for positioning the seek pointer. (This double translation is the worst performance - issue - particularly with all-perl encode engine.) - -*/ +/**/ +UNIMPLEMENTED(_encoded_utf8_to_bytes, I32); +UNIMPLEMENTED(_encoded_bytes_to_utf8, I32); - -#include "perliol.h" - -typedef struct -{ - PerlIOBuf base; /* PerlIOBuf stuff */ - SV * bufsv; - SV * enc; -} PerlIOEncode; - - -IV -PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len) +void +Encode_XSEncoding(pTHX_ encode_t * enc) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - dSP; - IV code; - code = PerlIOBuf_pushed(f,mode,Nullch,0); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVpv("Encode",0))); - XPUSHs(sv_2mortal(newSVpvn(arg,len))); - PUTBACK; - if (perl_call_method("getEncoding",G_SCALAR) != 1) - return -1; - SPAGAIN; - e->enc = POPs; - PUTBACK; - if (!SvROK(e->enc)) - return -1; - SvREFCNT_inc(e->enc); - FREETMPS; - LEAVE; - PerlIOBase(f)->flags |= PERLIO_F_UTF8; - return code; + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding", G_DISCARD); + SvREFCNT_dec(sv); } -IV -PerlIOEncode_popped(PerlIO *f) +void +call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - if (e->enc) - { - SvREFCNT_dec(e->enc); - e->enc = Nullsv; - } - if (e->bufsv) - { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - return 0; + /* Exists for breakpointing */ } -STDCHAR * -PerlIOEncode_get_base(PerlIO *f) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - if (!e->base.bufsiz) - e->base.bufsiz = 1024; - if (!e->bufsv) - { - e->bufsv = newSV(e->base.bufsiz); - sv_setpvn(e->bufsv,"",0); - } - e->base.buf = (STDCHAR *)SvPVX(e->bufsv); - if (!e->base.ptr) - e->base.ptr = e->base.buf; - if (!e->base.end) - e->base.end = e->base.buf; - if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) - { - Perl_warn(aTHX_ " ptr %p(%p)%p", - e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); - abort(); - } - if (SvLEN(e->bufsv) < e->base.bufsiz) - { - SSize_t poff = e->base.ptr - e->base.buf; - SSize_t eoff = e->base.end - e->base.buf; - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); - e->base.ptr = e->base.buf + poff; - e->base.end = e->base.buf + eoff; - } - if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) - { - Perl_warn(aTHX_ " ptr %p(%p)%p", - e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); - abort(); - } - return e->base.buf; -} -IV -PerlIOEncode_fill(PerlIO *f) +static SV * +encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, + int check) { - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - dTHX; - dSP; - IV code; - code = PerlIOBuf_fill(f); - if (code == 0) - { - SV *uni; - STRLEN len; - char *s; - /* Set SV that is the buffer to be buf..ptr */ - SvCUR_set(e->bufsv, e->base.end - e->base.buf); - SvUTF8_off(e->bufsv); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("toUnicode",G_SCALAR) != 1) - code = -1; - SPAGAIN; - uni = POPs; - PUTBACK; - /* Now get translated string (forced to UTF-8) and copy back to buffer - don't use sv_setsv as that may "steal" PV from returned temp - and so free() our known-large-enough buffer. - sv_setpvn() should do but let us do it long hand. - */ - s = SvPVutf8(uni,len); - if (s != SvPVX(e->bufsv)) - { - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); - Move(s,e->base.buf,len,char); - SvCUR_set(e->bufsv,len); + STRLEN slen; + U8 *s = (U8 *) SvPV(src, slen); + STRLEN tlen = slen; + STRLEN ddone = 0; + STRLEN sdone = 0; + + /* We allocate slen+1. + PerlIO dumps core if this value is smaller than this. */ + SV *dst = sv_2mortal(newSV(slen+1)); + U8 *d = (U8 *)SvPVX(dst); + STRLEN dlen = SvLEN(dst)-1; + int code; + + if (!slen){ + SvCUR_set(dst, 0); + SvPOK_only(dst); + goto ENCODE_END; } - SvUTF8_on(e->bufsv); - e->base.end = e->base.buf+len; - e->base.ptr = e->base.buf; - FREETMPS; - LEAVE; - } - return code; -} -IV -PerlIOEncode_flush(PerlIO *f) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - IV code = 0; - dTHX; - if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))) - { - dSP; - SV *str; - char *s; - STRLEN len; - SSize_t left = 0; - if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) + while (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check)) { - /* This is really just a flag to see if we took all the data, if - we did PerlIOBase_flush avoids a seek to lower layer. - Need to revisit if we start getting clever with unreads or seeks-in-buffer - */ - left = e->base.end - e->base.ptr; + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); + + if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL){ + break; + } + switch (code) { + case ENCODE_NOSPACE: + { + STRLEN more = 0; /* make sure you initialize! */ + STRLEN sleft; + sdone += slen; + ddone += dlen; + sleft = tlen - sdone; +#if ENCODE_XS_PROFILE >= 2 + Perl_warn(aTHX_ + "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", + more, sdone, sleft, SvLEN(dst)); +#endif + if (sdone != 0) { /* has src ever been processed ? */ +#if ENCODE_XS_USEFP == 2 + more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone + - SvLEN(dst); +#elif ENCODE_XS_USEFP + more = (1.0*SvLEN(dst)+1)/sdone * sleft; +#else + /* safe until SvLEN(dst) == MAX_INT/16 */ + more = (16*SvLEN(dst)+1)/sdone/16 * sleft; +#endif + } + more += UTF8_MAXLEN; /* insurance policy */ + d = (U8 *) SvGROW(dst, SvLEN(dst) + more); + /* dst need to grow need MORE bytes! */ + if (ddone >= SvLEN(dst)) { + Perl_croak(aTHX_ "Destination couldn't be grown."); + } + dlen = SvLEN(dst)-ddone-1; + d += ddone; + s += slen; + slen = tlen-sdone; + continue; + } + case ENCODE_NOREP: + /* encoding */ + if (dir == enc->f_utf8) { + STRLEN clen; + UV ch = + utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), + &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); + if (check & ENCODE_DIE_ON_ERR) { + Perl_croak( + aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d", + ch, enc->name[0], __LINE__); + }else{ + if (check & ENCODE_RETURN_ON_ERR){ + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner( + aTHX_ packWARN(WARN_UTF8), + "\"\\N{U+%" UVxf "}\" does not map to %s", + ch,enc->name[0]); + } + goto ENCODE_SET_SRC; + }else if (check & ENCODE_PERLQQ){ + SV* perlqq = + sv_2mortal(newSVpvf("\\x{%04x}", ch)); + sdone += slen + clen; + ddone += dlen + SvCUR(perlqq); + sv_catsv(dst, perlqq); + } else { + /* fallback char */ + sdone += slen + clen; + ddone += dlen + enc->replen; + sv_catpvn(dst, (char*)enc->rep, enc->replen); + } + } + } + /* decoding */ + else { + if (check & ENCODE_DIE_ON_ERR){ + Perl_croak( + aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)", + enc->name[0], (U8) s[slen], code); + }else{ + if (check & ENCODE_RETURN_ON_ERR){ + if (check & ENCODE_WARN_ON_ERR){ + Perl_warner( + aTHX_ packWARN(WARN_UTF8), + "%s \"\\x%02X\" does not map to Unicode (%d)", + enc->name[0], (U8) s[slen], code); + } + goto ENCODE_SET_SRC; + }else if (check & ENCODE_PERLQQ){ + SV* perlqq = + sv_2mortal(newSVpvf("\\x%02X", s[slen])); + sdone += slen + 1; + ddone += dlen + SvCUR(perlqq); + sv_catsv(dst, perlqq); + } else { + sdone += slen + 1; + ddone += dlen + strlen(FBCHAR_UTF8); + sv_catpv(dst, FBCHAR_UTF8); + } + } + } + /* settle variables when fallback */ + d = (U8 *)SvEND(dst); + dlen = SvLEN(dst) - ddone - 1; + s = (U8*)SvPVX(src) + sdone; + slen = tlen - sdone; + break; + + default: + Perl_croak(aTHX_ "Unexpected code %d converting %s %s", + code, (dir == enc->f_utf8) ? "to" : "from", + enc->name[0]); + return &PL_sv_undef; + } } - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); - SvUTF8_on(e->bufsv); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("fromUnicode",G_SCALAR) != 1) - code = -1; - SPAGAIN; - str = POPs; - PUTBACK; - s = SvPV(str,len); - if (s != SvPVX(e->bufsv)) - { - e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); - Move(s,e->base.buf,len,char); - SvCUR_set(e->bufsv,len); + ENCODE_SET_SRC: + if (check && !(check & ENCODE_LEAVE_SRC)){ + sdone = SvCUR(src) - (slen+sdone); + if (sdone) { + sv_setpvn(src, (char*)s+slen, sdone); + } + SvCUR_set(src, sdone); + } + /* warn("check = 0x%X, code = 0x%d\n", check, code); */ + if (code && !(check & ENCODE_RETURN_ON_ERR)) { + return &PL_sv_undef; } - SvUTF8_off(e->bufsv); - e->base.ptr = e->base.buf+len; - /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ - e->base.end = e->base.ptr + left; - FREETMPS; - LEAVE; - if (PerlIOBuf_flush(f) != 0) - code = -1; - } - return code; -} - -IV -PerlIOEncode_close(PerlIO *f) -{ - PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); - IV code = PerlIOBase_close(f); - dTHX; - if (e->bufsv) - { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - e->base.buf = NULL; - e->base.ptr = NULL; - e->base.end = NULL; - PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); - return code; -} -Off_t -PerlIOEncode_tell(PerlIO *f) -{ - PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); - /* Unfortunately the only way to get a postion is to back-translate, - the UTF8-bytes we have buf..ptr and adjust accordingly. - But we will try and save any unread data in case stream - is un-seekable. - */ - if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) - { - Size_t count = b->end - b->ptr; - PerlIO_push(f,&PerlIO_pending,"r",Nullch,0); - /* Save what we have left to read */ - PerlIOSelf(f,PerlIOBuf)->bufsiz = count; - PerlIO_unread(f,b->ptr,count); - /* There isn't any unread data - we just saved it - so avoid the lower seek */ - b->end = b->ptr; - /* Flush ourselves - now one layer down, - this does the back translate and adjusts position - */ - PerlIO_flush(PerlIONext(f)); - /* Set position of the saved data */ - PerlIOSelf(f,PerlIOBuf)->posn = b->posn; - } - else - { - PerlIO_flush(f); - } - return b->posn; -} + SvCUR_set(dst, dlen+ddone); + SvPOK_only(dst); -PerlIO_funcs PerlIO_encode = { - "encoding", - sizeof(PerlIOEncode), - PERLIO_K_BUFFERED, - PerlIOBase_fileno, - PerlIOBuf_fdopen, - PerlIOBuf_open, - PerlIOBuf_reopen, - PerlIOEncode_pushed, - PerlIOEncode_popped, - PerlIOBuf_read, - PerlIOBuf_unread, - PerlIOBuf_write, - PerlIOBuf_seek, - PerlIOEncode_tell, - PerlIOEncode_close, - PerlIOEncode_flush, - PerlIOEncode_fill, - PerlIOBase_eof, - PerlIOBase_error, - PerlIOBase_clearerr, - PerlIOBuf_setlinebuf, - PerlIOEncode_get_base, - PerlIOBuf_bufsiz, - PerlIOBuf_get_ptr, - PerlIOBuf_get_cnt, - PerlIOBuf_set_ptrcnt, -}; +#if ENCODE_XS_PROFILE + if (SvCUR(dst) > SvCUR(src)){ + Perl_warn(aTHX_ + "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", + SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), + (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); + } #endif -void -Encode_Define(pTHX_ encode_t *enc) -{ - HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI); - HV *stash = gv_stashpv("Encode::XS", TRUE); - SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); - hv_store(hash,enc->name,strlen(enc->name),sv,0); + ENCODE_END: + *SvEND(dst) = '\0'; + return dst; } -void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} +MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ -static SV * -encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) -{ - STRLEN slen; - U8 *s = (U8 *) SvPV(src,slen); - SV *dst = sv_2mortal(newSV(2*slen+1)); - if (slen) - { - U8 *d = (U8 *) SvGROW(dst, 2*slen+1); - STRLEN dlen = SvLEN(dst); - int code; - while ((code = do_encode(dir,s,&slen,d,dlen,&dlen,!check))) - { - SvCUR_set(dst,dlen); - SvPOK_on(dst); - - if (code == ENCODE_FALLBACK) - break; - - switch(code) - { - case ENCODE_NOSPACE: - { - STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN); - if (need <= SvLEN(dst)) - need += UTF8_MAXLEN; - d = (U8 *) SvGROW(dst, need); - dlen = SvLEN(dst); - slen = SvCUR(src); - break; - } - - case ENCODE_NOREP: - if (dir == enc->f_utf8) - { - if (!check && ckWARN_d(WARN_UTF8)) - { - STRLEN clen; - UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0); - Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%"UVxf"}\" does not map to %s", ch, enc->name); - /* FIXME: Skip over the character, copy in replacement and continue - * but that is messy so for now just fail. - */ - return &PL_sv_undef; - } - else - { - return &PL_sv_undef; - } - } - else - { - /* UTF-8 is supposed to be "Universal" so should not happen */ - Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", - enc->name, (int)(SvCUR(src)-slen),s+slen); - } - break; - - case ENCODE_PARTIAL: - if (!check && ckWARN_d(WARN_UTF8)) - { - Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", - (dir == enc->f_utf8) ? "UTF-8" : enc->name); - } - return &PL_sv_undef; - - default: - Perl_croak(aTHX_ "Unexpected code %d converting %s %s", - code, (dir == enc->f_utf8) ? "to" : "from",enc->name); - return &PL_sv_undef; - } - } - SvCUR_set(dst,dlen); - SvPOK_on(dst); - if (check) - { - if (slen < SvCUR(src)) - { - Move(s+slen,s,SvCUR(src)-slen,U8); - } - SvCUR_set(src,SvCUR(src)-slen); - } - } - return dst; -} - -MODULE = Encode PACKAGE = Encode PREFIX = sv_ +PROTOTYPES: ENABLE void -valid_utf8(sv) -SV * sv +Method_name(obj) +SV * obj CODE: - { - STRLEN len; - char *s = SvPV(sv,len); - if (!SvUTF8(sv) || is_utf8_string((U8*)s,len)) - XSRETURN_YES; - else - XSRETURN_NO; - } - -void -sv_utf8_encode(sv) -SV * sv - -bool -sv_utf8_decode(sv) -SV * sv - -void -sv_utf8_upgrade(sv) -SV * sv - -bool -sv_utf8_downgrade(sv,failok=0) -SV * sv -bool failok - -MODULE = Encode PACKAGE = Encode::XS PREFIX = Encode_ - -PROTOTYPES: ENABLE +{ + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); + XSRETURN(1); +} void -Encode_toUnicode(obj,src,check = 0) +Method_decode(obj,src,check = 0) SV * obj SV * src int check CODE: - { - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); - SvUTF8_on(ST(0)); - XSRETURN(1); - } +{ + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); + SvUTF8_on(ST(0)); + XSRETURN(1); +} void -Encode_fromUnicode(obj,src,check = 0) +Method_encode(obj,src,check = 0) SV * obj SV * src int check CODE: - { - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - sv_utf8_upgrade(src); - ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); - XSRETURN(1); - } +{ + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + sv_utf8_upgrade(src); + ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); + XSRETURN(1); +} MODULE = Encode PACKAGE = Encode @@ -483,234 +269,224 @@ PROTOTYPES: ENABLE I32 _bytes_to_utf8(sv, ...) - SV * sv - CODE: - { - SV * encoding = items == 2 ? ST(1) : Nullsv; - - if (encoding) - RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); - else { - STRLEN len; - U8* s = (U8*)SvPV(sv, len); - U8* converted; - - converted = bytes_to_utf8(s, &len); /* This allocs */ - sv_setpvn(sv, (char *)converted, len); - SvUTF8_on(sv); /* XXX Should we? */ - Safefree(converted); /* ... so free it */ - RETVAL = len; - } - } - OUTPUT: - RETVAL +SV * sv +CODE: +{ + SV * encoding = items == 2 ? ST(1) : Nullsv; + + if (encoding) + RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); + else { + STRLEN len; + U8* s = (U8*)SvPV(sv, len); + U8* converted; + + converted = bytes_to_utf8(s, &len); /* This allocs */ + sv_setpvn(sv, (char *)converted, len); + SvUTF8_on(sv); /* XXX Should we? */ + Safefree(converted); /* ... so free it */ + RETVAL = len; + } +} +OUTPUT: + RETVAL I32 _utf8_to_bytes(sv, ...) - SV * sv - CODE: - { - SV * to = items > 1 ? ST(1) : Nullsv; - SV * check = items > 2 ? ST(2) : Nullsv; - - if (to) - RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); - else { - STRLEN len; - U8 *s = (U8*)SvPV(sv, len); - - if (SvTRUE(check)) { - /* Must do things the slow way */ - U8 *dest; - U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ - U8 *send = s + len; - - New(83, dest, len, U8); /* I think */ - - while (s < send) { - if (*s < 0x80) - *dest++ = *s++; - else { - STRLEN ulen; - UV uv = *s++; - - /* Have to do it all ourselves because of error routine, - aargh. */ - if (!(uv & 0x40)) - goto failure; - if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } - else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } - else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } - else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } - else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } - else if (!(uv & 0x01)) { ulen = 7; uv = 0; } - else { ulen = 13; uv = 0; } - - /* Note change to utf8.c variable naming, for variety */ - while (ulen--) { - if ((*s & 0xc0) != 0x80) - goto failure; +SV * sv +CODE: +{ + SV * to = items > 1 ? ST(1) : Nullsv; + SV * check = items > 2 ? ST(2) : Nullsv; + + if (to) { + RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); + } else { + STRLEN len; + U8 *s = (U8*)SvPV(sv, len); + + RETVAL = 0; + if (SvTRUE(check)) { + /* Must do things the slow way */ + U8 *dest; + /* We need a copy to pass to check() */ + U8 *src = (U8*)savepv((char *)s); + U8 *send = s + len; + + New(83, dest, len, U8); /* I think */ + + while (s < send) { + if (*s < 0x80){ + *dest++ = *s++; + } else { + STRLEN ulen; + UV uv = *s++; + + /* Have to do it all ourselves because of error routine, + aargh. */ + if (!(uv & 0x40)){ goto failure; } + if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } + else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } + else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } + else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } + else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } + else if (!(uv & 0x01)) { ulen = 7; uv = 0; } + else { ulen = 13; uv = 0; } - else - uv = (uv << 6) | (*s++ & 0x3f); + /* Note change to utf8.c variable naming, for variety */ + while (ulen--) { + if ((*s & 0xc0) != 0x80){ + goto failure; + } else { + uv = (uv << 6) | (*s++ & 0x3f); + } } if (uv > 256) { failure: - call_failure(check, s, dest, src); - /* Now what happens? */ + call_failure(check, s, dest, src); + /* Now what happens? */ } *dest++ = (U8)uv; - } - } - } else - RETVAL = (utf8_to_bytes(s, &len) ? len : 0); - } + } + } + } else { + RETVAL = (utf8_to_bytes(s, &len) ? len : 0); } - OUTPUT: - RETVAL + } +} +OUTPUT: + RETVAL -SV * -_chars_to_utf8(sv, from, ...) - SV * sv - SV * from - CODE: - { - SV * check = items == 3 ? ST(2) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL +bool +is_utf8(sv, check = 0) +SV * sv +int check +CODE: +{ + if (SvGMAGICAL(sv)) /* it could be $1, for example */ + sv = newSVsv(sv); /* GMAGIG will be done */ + if (SvPOK(sv)) { + RETVAL = SvUTF8(sv) ? TRUE : FALSE; + if (RETVAL && + check && + !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) + RETVAL = FALSE; + } else { + RETVAL = FALSE; + } + if (sv != ST(0)) + SvREFCNT_dec(sv); /* it was a temp copy */ +} +OUTPUT: + RETVAL SV * -_utf8_to_chars(sv, to, ...) - SV * sv - SV * to - CODE: - { - SV * check = items == 3 ? ST(2) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL +_utf8_on(sv) +SV * sv +CODE: +{ + if (SvPOK(sv)) { + SV *rsv = newSViv(SvUTF8(sv)); + RETVAL = rsv; + SvUTF8_on(sv); + } else { + RETVAL = &PL_sv_undef; + } +} +OUTPUT: + RETVAL SV * -_utf8_to_chars_check(sv, ...) - SV * sv - CODE: - { - SV * check = items == 2 ? ST(1) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL +_utf8_off(sv) +SV * sv +CODE: +{ + if (SvPOK(sv)) { + SV *rsv = newSViv(SvUTF8(sv)); + RETVAL = rsv; + SvUTF8_off(sv); + } else { + RETVAL = &PL_sv_undef; + } +} +OUTPUT: + RETVAL -SV * -_bytes_to_chars(sv, from, ...) - SV * sv - SV * from - CODE: - { - SV * check = items == 3 ? ST(2) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL +PROTOTYPES: DISABLE -SV * -_chars_to_bytes(sv, to, ...) - SV * sv - SV * to - CODE: - { - SV * check = items == 3 ? ST(2) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL -SV * -_from_to(sv, from, to, ...) - SV * sv - SV * from - SV * to - CODE: - { - SV * check = items == 4 ? ST(3) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL +int +DIE_ON_ERR() +CODE: + RETVAL = ENCODE_DIE_ON_ERR; +OUTPUT: + RETVAL -bool -_is_utf8(sv, ...) - SV * sv - CODE: - { - SV * check = items == 2 ? ST(1) : Nullsv; - if (SvPOK(sv)) { - RETVAL = SvUTF8(sv) ? 1 : 0; - if (RETVAL && - SvTRUE(check) && - !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) - RETVAL = FALSE; - } else { - RETVAL = FALSE; - } - } - OUTPUT: - RETVAL +int +WARN_ON_ERR() +CODE: + RETVAL = ENCODE_WARN_ON_ERR; +OUTPUT: + RETVAL -SV * -_on_utf8(sv) - SV * sv - CODE: - { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - SvUTF8_on(sv); - } else { - RETVAL = &PL_sv_undef; - } - } - OUTPUT: - RETVAL +int +LEAVE_SRC() +CODE: + RETVAL = ENCODE_LEAVE_SRC; +OUTPUT: + RETVAL -SV * -_off_utf8(sv) - SV * sv - CODE: - { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - SvUTF8_off(sv); - } else { - RETVAL = &PL_sv_undef; - } - } - OUTPUT: - RETVAL +int +RETURN_ON_ERR() +CODE: + RETVAL = ENCODE_RETURN_ON_ERR; +OUTPUT: + RETVAL -SV * -_utf_to_utf(sv, from, to, ...) - SV * sv - SV * from - SV * to - CODE: - { - SV * check = items == 4 ? ST(3) : Nullsv; - RETVAL = &PL_sv_undef; - } - OUTPUT: - RETVAL +int +PERLQQ() +CODE: + RETVAL = ENCODE_PERLQQ; +OUTPUT: + RETVAL + +int +FB_DEFAULT() +CODE: + RETVAL = ENCODE_FB_DEFAULT; +OUTPUT: + RETVAL + +int +FB_CROAK() +CODE: + RETVAL = ENCODE_FB_CROAK; +OUTPUT: + RETVAL + +int +FB_QUIET() +CODE: + RETVAL = ENCODE_FB_QUIET; +OUTPUT: + RETVAL + +int +FB_WARN() +CODE: + RETVAL = ENCODE_FB_WARN; +OUTPUT: + RETVAL + +int +FB_PERLQQ() +CODE: + RETVAL = ENCODE_FB_PERLQQ; +OUTPUT: + RETVAL BOOT: { -#ifdef USE_PERLIO - PerlIO_define_layer(&PerlIO_encode); -#endif -#include "iso8859.def" -#include "EBCDIC.def" -#include "Symbols.def" +#include "def_t.h" +#include "def_t.exh" }