2 * $Id: encoding.xs,v 0.3 2002/04/21 22:14:41 dankogai Exp $
5 #define PERL_NO_GET_CONTEXT
11 #define OUR_DEFAULT_FB "Encode::PERLQQ"
13 #if defined(USE_PERLIO) && !defined(USE_SFIO)
15 /* Define an encoding "layer" in the perliol.h sense.
17 The layer defined here "inherits" in an object-oriented sense from
18 the "perlio" layer with its PerlIOBuf_* "methods". The
19 implementation is particularly efficient as until Encode settles
20 down there is no point in tryint to tune it.
22 The layer works by overloading the "fill" and "flush" methods.
24 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
25 perl API to convert the encoded data to UTF-8 form, then copies it
26 back to the buffer. The "base class's" read methods then see the
29 "flush" transforms the UTF-8 data deposited by the "base class's
30 write method in the buffer back into the encoded form using the
31 encode OO perl API, then copies data back into the buffer and calls
34 Note that "flush" is _also_ called for read mode - we still do the
35 (back)-translate so that the base class's "flush" sees the
36 correct number of encoded chars for positioning the seek
37 pointer. (This double translation is the worst performance issue -
38 particularly with all-perl encode engine.)
45 PerlIOBuf base; /* PerlIOBuf stuff */
46 SV *bufsv; /* buffer seen by layers above */
47 SV *dataSV; /* data we have read from layer below */
48 SV *enc; /* the encoding object */
49 SV *chk; /* CHECK in Encode methods */
50 int flags; /* Flags currently just needs lines */
51 int inEncodeCall; /* trap recursive encode calls */
57 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
59 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
60 SV *sv = &PL_sv_undef;
63 /* Not 100% sure stack swap is right thing to do during dup ... */
64 PUSHSTACKi(PERLSI_MAGIC);
71 if (call_method("name", G_SCALAR) == 1) {
84 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab)
86 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
88 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab);
91 PUSHSTACKi(PERLSI_MAGIC);
100 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
101 /* should never happen */
102 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
109 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
111 if (ckWARN_d(WARN_IO))
112 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
123 if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
124 if (ckWARN_d(WARN_IO))
125 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method",
133 e->enc = newSVsv(result);
137 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
138 if (ckWARN_d(WARN_IO))
139 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
146 if (SvTRUE(result)) {
147 e->flags |= NEEDS_LINES;
150 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
153 e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0));
163 PerlIOEncode_popped(pTHX_ PerlIO * f)
165 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
167 SvREFCNT_dec(e->enc);
171 SvREFCNT_dec(e->bufsv);
175 SvREFCNT_dec(e->dataSV);
179 SvREFCNT_dec(e->chk);
186 PerlIOEncode_get_base(pTHX_ PerlIO * f)
188 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
190 e->base.bufsiz = 1024;
192 e->bufsv = newSV(e->base.bufsiz);
193 sv_setpvn(e->bufsv, "", 0);
195 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
197 e->base.ptr = e->base.buf;
199 e->base.end = e->base.buf;
200 if (e->base.ptr < e->base.buf
201 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
202 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
203 e->base.buf + SvLEN(e->bufsv));
206 if (SvLEN(e->bufsv) < e->base.bufsiz) {
207 SSize_t poff = e->base.ptr - e->base.buf;
208 SSize_t eoff = e->base.end - e->base.buf;
209 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
210 e->base.ptr = e->base.buf + poff;
211 e->base.end = e->base.buf + eoff;
213 if (e->base.ptr < e->base.buf
214 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
215 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
216 e->base.buf + SvLEN(e->bufsv));
223 PerlIOEncode_fill(pTHX_ PerlIO * f)
225 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
231 if (PerlIO_flush(f) != 0)
234 if (!PerlIO_fast_gets(n)) {
235 /* Things get too messy if we don't have a buffer layer
236 push a :perlio to do the job */
238 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
240 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
243 PUSHSTACKi(PERLSI_MAGIC);
248 avail = PerlIO_get_cnt(n);
250 avail = PerlIO_fill(n);
252 avail = PerlIO_get_cnt(n);
255 if (!PerlIO_error(n) && PerlIO_eof(n))
259 if (avail > 0 || (e->flags & NEEDS_LINES)) {
260 STDCHAR *ptr = PerlIO_get_ptr(n);
261 SSize_t use = (avail >= 0) ? avail : 0;
265 e->base.ptr = e->base.end = (STDCHAR *) NULL;
266 (void) PerlIOEncode_get_base(aTHX_ f);
268 e->dataSV = newSV(0);
269 if (SvTYPE(e->dataSV) < SVt_PV) {
270 sv_upgrade(e->dataSV,SVt_PV);
272 if (e->flags & NEEDS_LINES) {
273 /* Encoding needs whole lines (e.g. iso-2022-*)
274 search back from end of available data for
277 STDCHAR *nl = ptr+use-1;
284 if (nl >= ptr && *nl == '\n') {
285 /* found a line - take up to and including that */
288 else if (avail > 0) {
289 /* No line, but not EOF - append avail to the pending data */
290 sv_catpvn(e->dataSV, (char*)ptr, use);
291 PerlIO_set_ptrcnt(n, ptr+use, 0);
294 else if (!SvCUR(e->dataSV)) {
298 if (SvCUR(e->dataSV)) {
299 /* something left over from last time - create a normal
300 SV with new data appended
302 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
303 if (e->flags & NEEDS_LINES) {
304 /* Have to grow buffer */
305 e->base.bufsiz = use + SvCUR(e->dataSV);
306 PerlIOEncode_get_base(aTHX_ f);
309 use = e->base.bufsiz - SvCUR(e->dataSV);
312 sv_catpvn(e->dataSV,(char*)ptr,use);
315 /* Create a "dummy" SV to represent the available data from layer below */
316 if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) {
317 Safefree(SvPVX_mutable(e->dataSV));
319 if (use > (SSize_t)e->base.bufsiz) {
320 if (e->flags & NEEDS_LINES) {
321 /* Have to grow buffer */
322 e->base.bufsiz = use;
323 PerlIOEncode_get_base(aTHX_ f);
326 use = e->base.bufsiz;
329 SvPV_set(e->dataSV, (char *) ptr);
330 SvLEN_set(e->dataSV, 0); /* Hands off sv.c - it isn't yours */
331 SvCUR_set(e->dataSV,use);
332 SvPOK_only(e->dataSV);
334 SvUTF8_off(e->dataSV);
340 if (call_method("decode", G_SCALAR) != 1) {
341 Perl_die(aTHX_ "panic: decode did not return a value");
346 /* Now get translated string (forced to UTF-8) and use as buffer */
348 s = SvPVutf8(uni, len);
349 #ifdef PARANOID_ENCODE_CHECKS
350 if (len && !is_utf8_string((U8*)s,len)) {
351 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
357 /* if decode gave us back dataSV then data may vanish when
358 we do ptrcnt adjust - so take our copy now.
359 (The copy is a pain - need a put-it-here option for decode.)
361 sv_setpvn(e->bufsv,s,len);
362 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
363 e->base.end = e->base.ptr + SvCUR(e->bufsv);
364 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
367 /* Adjust ptr/cnt not taking anything which
368 did not translate - not clear this is a win */
369 /* compute amount we took */
370 use -= SvCUR(e->dataSV);
371 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
372 /* and as we did not take it it isn't pending */
373 SvCUR_set(e->dataSV,0);
375 /* Got nothing - assume partial character so we need some more */
376 /* Make sure e->dataSV is a normal SV before re-filling as
377 buffer alias will change under us
379 s = SvPV(e->dataSV,len);
380 sv_setpvn(e->dataSV,s,len);
381 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
389 PerlIOBase(f)->flags |= PERLIO_F_EOF;
391 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
400 PerlIOEncode_flush(pTHX_ PerlIO * f)
402 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
411 if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
412 if (e->inEncodeCall) return 0;
413 /* Write case - encode the buffer and write() to layer below */
414 PUSHSTACKi(PERLSI_MAGIC);
420 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
426 if (call_method("encode", G_SCALAR) != 1) {
428 Perl_die(aTHX_ "panic: encode did not return a value");
435 count = PerlIO_write(PerlIONext(f),s,len);
436 if ((STRLEN)count != len) {
442 if (PerlIO_flush(PerlIONext(f)) != 0) {
445 if (SvCUR(e->bufsv)) {
446 /* Did not all translate */
447 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
451 else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
453 /* if we have any untranslated stuff then unread that first */
454 /* FIXME - unread is fragile is there a better way ? */
455 if (e->dataSV && SvCUR(e->dataSV)) {
456 s = SvPV(e->dataSV, len);
457 count = PerlIO_unread(PerlIONext(f),s,len);
458 if ((STRLEN)count != len) {
461 SvCUR_set(e->dataSV,0);
463 /* See if there is anything left in the buffer */
464 if (e->base.ptr < e->base.end) {
465 if (e->inEncodeCall) return 0;
466 /* Bother - have unread data.
467 re-encode and unread() to layer below
469 PUSHSTACKi(PERLSI_MAGIC);
473 str = sv_newmortal();
474 sv_upgrade(str, SVt_PV);
475 SvPV_set(str, (char*)e->base.ptr);
477 SvCUR_set(str, e->base.end - e->base.ptr);
486 if (call_method("encode", G_SCALAR) != 1) {
488 Perl_die(aTHX_ "panic: encode did not return a value");
495 count = PerlIO_unread(PerlIONext(f),s,len);
496 if ((STRLEN)count != len) {
504 e->base.ptr = e->base.end = e->base.buf;
505 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
511 PerlIOEncode_close(pTHX_ PerlIO * f)
513 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
515 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
516 /* Discard partial character */
518 SvCUR_set(e->dataSV,0);
520 /* Don't back decode and unread any pending data */
521 e->base.ptr = e->base.end = e->base.buf;
523 code = PerlIOBase_close(aTHX_ f);
525 /* This should only fire for write case */
526 if (e->base.buf && e->base.ptr > e->base.buf) {
527 Perl_croak(aTHX_ "Close with partial character");
529 SvREFCNT_dec(e->bufsv);
535 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
540 PerlIOEncode_tell(pTHX_ PerlIO * f)
542 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
543 /* Unfortunately the only way to get a postion is to (re-)translate,
544 the UTF8 we have in bufefr and then ask layer below
547 if (b->buf && b->ptr > b->buf) {
548 Perl_croak(aTHX_ "Cannot tell at partial character");
550 return PerlIO_tell(PerlIONext(f));
554 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
555 CLONE_PARAMS * params, int flags)
557 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
558 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
559 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
561 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
568 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
570 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
571 if (e->flags & NEEDS_LINES) {
573 const char *ptr = (const char *) vbuf;
574 const char *end = ptr+count;
576 const char *nl = ptr;
577 while (nl < end && *nl++ != '\n') /* empty body */;
578 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
579 if (done != nl-ptr) {
586 if (ptr[-1] == '\n') {
587 if (PerlIOEncode_flush(aTHX_ f) != 0) {
592 return (SSize_t) (ptr - (const char *) vbuf);
595 return PerlIOBuf_write(aTHX_ f, vbuf, count);
599 PerlIO_funcs PerlIO_encode = {
600 sizeof(PerlIO_funcs),
602 sizeof(PerlIOEncode),
603 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
607 NULL, /* binmode - always pop */
622 PerlIOBase_setlinebuf,
623 PerlIOEncode_get_base,
627 PerlIOBuf_set_ptrcnt,
629 #endif /* encode layer */
631 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
637 SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI);
639 * we now "use Encode ()" here instead of
640 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
641 * is invoked without prior "use Encode". -- dankogai
643 PUSHSTACKi(PERLSI_MAGIC);
645 if (!get_cvs(OUR_DEFAULT_FB, 0)) {
647 /* This would just be an irritant now loading works */
648 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
651 /* Encode needs a lot of stack - it is likely to move ... */
653 /* The SV is magically freed by load_module */
654 load_module(PERL_LOADMOD_NOIMPORT, newSVpvn("Encode", 6), Nullsv, Nullsv);
660 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
661 /* should never happen */
662 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
668 PerlIO_define_layer(aTHX_ &PerlIO_encode);