1 #define PERL_NO_GET_CONTEXT
7 #define OUR_DEFAULT_FB "Encode::PERLQQ"
9 #if defined(USE_PERLIO)
11 /* Define an encoding "layer" in the perliol.h sense.
13 The layer defined here "inherits" in an object-oriented sense from
14 the "perlio" layer with its PerlIOBuf_* "methods". The
15 implementation is particularly efficient as until Encode settles
16 down there is no point in tryint to tune it.
18 The layer works by overloading the "fill" and "flush" methods.
20 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
21 perl API to convert the encoded data to UTF-8 form, then copies it
22 back to the buffer. The "base class's" read methods then see the
25 "flush" transforms the UTF-8 data deposited by the "base class's
26 write method in the buffer back into the encoded form using the
27 encode OO perl API, then copies data back into the buffer and calls
30 Note that "flush" is _also_ called for read mode - we still do the
31 (back)-translate so that the base class's "flush" sees the
32 correct number of encoded chars for positioning the seek
33 pointer. (This double translation is the worst performance issue -
34 particularly with all-perl encode engine.)
41 PerlIOBuf base; /* PerlIOBuf stuff */
42 SV *bufsv; /* buffer seen by layers above */
43 SV *dataSV; /* data we have read from layer below */
44 SV *enc; /* the encoding object */
45 SV *chk; /* CHECK in Encode methods */
46 int flags; /* Flags currently just needs lines */
47 int inEncodeCall; /* trap recursive encode calls */
52 static const MGVTBL PerlIOEncode_tag = { 0, 0, 0, 0, 0, 0, 0, 0 };
55 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
57 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
59 PERL_UNUSED_ARG(flags);
60 /* During cloning, return an undef token object so that _pushed() knows
61 * that it should not call methods and wait for _dup() to actually dup the
65 sv_magicext(sv, NULL, PERL_MAGIC_ext, &PerlIOEncode_tag, 0, 0);
71 /* Not 100% sure stack swap is right thing to do during dup ... */
72 PUSHSTACKi(PERLSI_MAGIC);
78 if (call_method("name", G_SCALAR) == 1) {
91 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab)
93 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
95 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab);
98 if (SvTYPE(arg) >= SVt_PVMG
99 && mg_findext(arg, PERL_MAGIC_ext, &PerlIOEncode_tag)) {
106 PUSHSTACKi(PERLSI_MAGIC);
113 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
114 /* should never happen */
115 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
122 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
124 if (ckWARN_d(WARN_IO))
125 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
136 if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
137 if (ckWARN_d(WARN_IO))
138 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method",
146 e->enc = newSVsv(result);
150 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
151 if (ckWARN_d(WARN_IO))
152 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
159 if (SvTRUE(result)) {
160 e->flags |= NEEDS_LINES;
163 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
166 e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0));
176 PerlIOEncode_popped(pTHX_ PerlIO * f)
178 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
180 SvREFCNT_dec(e->enc);
184 SvREFCNT_dec(e->bufsv);
188 SvREFCNT_dec(e->dataSV);
192 SvREFCNT_dec(e->chk);
199 PerlIOEncode_get_base(pTHX_ PerlIO * f)
201 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
203 e->base.bufsiz = 1024;
205 e->bufsv = newSV(e->base.bufsiz);
208 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
210 e->base.ptr = e->base.buf;
212 e->base.end = e->base.buf;
213 if (e->base.ptr < e->base.buf
214 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
215 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
216 e->base.buf + SvLEN(e->bufsv));
219 if (SvLEN(e->bufsv) < e->base.bufsiz) {
220 SSize_t poff = e->base.ptr - e->base.buf;
221 SSize_t eoff = e->base.end - e->base.buf;
222 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
223 e->base.ptr = e->base.buf + poff;
224 e->base.end = e->base.buf + eoff;
226 if (e->base.ptr < e->base.buf
227 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
228 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
229 e->base.buf + SvLEN(e->bufsv));
236 PerlIOEncode_fill(pTHX_ PerlIO * f)
238 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
244 if (PerlIO_flush(f) != 0)
247 if (!PerlIO_fast_gets(n)) {
248 /* Things get too messy if we don't have a buffer layer
249 push a :perlio to do the job */
251 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
253 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
256 PUSHSTACKi(PERLSI_MAGIC);
260 avail = PerlIO_get_cnt(n);
262 avail = PerlIO_fill(n);
264 avail = PerlIO_get_cnt(n);
267 if (!PerlIO_error(n) && PerlIO_eof(n))
271 if (avail > 0 || (e->flags & NEEDS_LINES)) {
272 STDCHAR *ptr = PerlIO_get_ptr(n);
273 SSize_t use = (avail >= 0) ? avail : 0;
277 e->base.ptr = e->base.end = (STDCHAR *) NULL;
278 (void) PerlIOEncode_get_base(aTHX_ f);
280 e->dataSV = newSV(0);
281 if (SvTYPE(e->dataSV) < SVt_PV) {
282 sv_upgrade(e->dataSV,SVt_PV);
284 if (e->flags & NEEDS_LINES) {
285 /* Encoding needs whole lines (e.g. iso-2022-*)
286 search back from end of available data for
289 STDCHAR *nl = ptr+use-1;
296 if (nl >= ptr && *nl == '\n') {
297 /* found a line - take up to and including that */
300 else if (avail > 0) {
301 /* No line, but not EOF - append avail to the pending data */
302 sv_catpvn(e->dataSV, (char*)ptr, use);
303 PerlIO_set_ptrcnt(n, ptr+use, 0);
306 else if (!SvCUR(e->dataSV)) {
310 if (!SvCUR(e->dataSV))
311 SvPVCLEAR(e->dataSV);
312 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
313 if (e->flags & NEEDS_LINES) {
314 /* Have to grow buffer */
315 e->base.bufsiz = use + SvCUR(e->dataSV);
316 PerlIOEncode_get_base(aTHX_ f);
319 use = e->base.bufsiz - SvCUR(e->dataSV);
322 sv_catpvn(e->dataSV,(char*)ptr,use);
323 SvUTF8_off(e->dataSV);
329 if (call_method("decode", G_SCALAR) != 1) {
330 Perl_die(aTHX_ "panic: decode did not return a value");
335 /* No cows allowed. */
336 if (SvTHINKFIRST(e->dataSV)) SvPV_force_nolen(e->dataSV);
337 /* Now get translated string (forced to UTF-8) and use as buffer */
339 s = SvPVutf8(uni, len);
340 #ifdef PARANOID_ENCODE_CHECKS
341 if (len && !is_utf8_string((U8*)s,len)) {
342 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
348 /* if decode gave us back dataSV then data may vanish when
349 we do ptrcnt adjust - so take our copy now.
350 (The copy is a pain - need a put-it-here option for decode.)
352 sv_setpvn(e->bufsv,s,len);
353 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
354 e->base.end = e->base.ptr + SvCUR(e->bufsv);
355 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
358 /* Adjust ptr/cnt not taking anything which
359 did not translate - not clear this is a win */
360 /* compute amount we took */
361 if (!SvPOKp(e->dataSV)) (void)SvPV_force_nolen(e->dataSV);
362 use -= SvCUR(e->dataSV);
363 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
364 /* and as we did not take it, it isn't pending */
365 SvCUR_set(e->dataSV,0);
367 /* Got nothing - assume partial character so we need some more */
368 /* Make sure e->dataSV is a normal SV before re-filling as
369 buffer alias will change under us
371 s = SvPV(e->dataSV,len);
372 sv_setpvn(e->dataSV,s,len);
373 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
381 PerlIOBase(f)->flags |= PERLIO_F_EOF;
384 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
385 Perl_PerlIO_save_errno(aTHX_ f);
395 PerlIOEncode_flush(pTHX_ PerlIO * f)
397 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
406 if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
407 if (e->inEncodeCall) return 0;
408 /* Write case - encode the buffer and write() to layer below */
409 PUSHSTACKi(PERLSI_MAGIC);
414 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
420 if (call_method("encode", G_SCALAR) != 1) {
422 Perl_die(aTHX_ "panic: encode did not return a value");
429 count = PerlIO_write(PerlIONext(f),s,len);
430 if ((STRLEN)count != len) {
436 if (PerlIO_flush(PerlIONext(f)) != 0) {
439 if (!SvPOKp(e->bufsv) || SvTHINKFIRST(e->bufsv))
440 (void)SvPV_force_nolen(e->bufsv);
441 if ((STDCHAR *)SvPVX(e->bufsv) != e->base.buf) {
442 e->base.ptr = (STDCHAR *)SvEND(e->bufsv);
443 e->base.end = (STDCHAR *)SvPVX(e->bufsv) + (e->base.end-e->base.buf);
444 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
446 (void)PerlIOEncode_get_base(aTHX_ f);
447 if (SvCUR(e->bufsv)) {
448 /* Did not all translate */
449 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
453 else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
455 /* if we have any untranslated stuff then unread that first */
456 /* FIXME - unread is fragile is there a better way ? */
457 if (e->dataSV && SvCUR(e->dataSV)) {
458 s = SvPV(e->dataSV, len);
459 count = PerlIO_unread(PerlIONext(f),s,len);
460 if ((STRLEN)count != len) {
463 SvCUR_set(e->dataSV,0);
465 /* See if there is anything left in the buffer */
466 if (e->base.ptr < e->base.end) {
467 if (e->inEncodeCall) return 0;
468 /* Bother - have unread data.
469 re-encode and unread() to layer below
471 PUSHSTACKi(PERLSI_MAGIC);
474 str = sv_newmortal();
475 sv_upgrade(str, SVt_PV);
476 SvPV_set(str, (char*)e->base.ptr);
478 SvCUR_set(str, e->base.end - e->base.ptr);
487 if (call_method("encode", G_SCALAR) != 1) {
489 Perl_die(aTHX_ "panic: encode did not return a value");
496 count = PerlIO_unread(PerlIONext(f),s,len);
497 if ((STRLEN)count != len) {
505 e->base.ptr = e->base.end = e->base.buf;
506 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
512 PerlIOEncode_close(pTHX_ PerlIO * f)
514 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
516 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
517 /* Discard partial character */
519 SvCUR_set(e->dataSV,0);
521 /* Don't back decode and unread any pending data */
522 e->base.ptr = e->base.end = e->base.buf;
524 code = PerlIOBase_close(aTHX_ f);
526 /* This should only fire for write case */
527 if (e->base.buf && e->base.ptr > e->base.buf) {
528 Perl_croak(aTHX_ "Close with partial character");
530 SvREFCNT_dec(e->bufsv);
536 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
541 PerlIOEncode_tell(pTHX_ PerlIO * f)
543 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
544 /* Unfortunately the only way to get a position is to (re-)translate,
545 the UTF8 we have in buffer and then ask layer below
548 if (b->buf && b->ptr > b->buf) {
549 Perl_croak(aTHX_ "Cannot tell at partial character");
551 return PerlIO_tell(PerlIONext(f));
555 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
556 CLONE_PARAMS * params, int flags)
558 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
559 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
560 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
562 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
565 fe->chk = PerlIO_sv_dup(aTHX_ oe->chk, params);
572 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
574 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
575 if (e->flags & NEEDS_LINES) {
577 const char *ptr = (const char *) vbuf;
578 const char *end = ptr+count;
580 const char *nl = ptr;
581 while (nl < end && *nl++ != '\n') /* empty body */;
582 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
583 if (done != nl-ptr) {
590 if (ptr[-1] == '\n') {
591 if (PerlIOEncode_flush(aTHX_ f) != 0) {
596 return (SSize_t) (ptr - (const char *) vbuf);
599 return PerlIOBuf_write(aTHX_ f, vbuf, count);
603 static PERLIO_FUNCS_DECL(PerlIO_encode) = {
604 sizeof(PerlIO_funcs),
606 sizeof(PerlIOEncode),
607 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
611 NULL, /* binmode - always pop */
626 PerlIOBase_setlinebuf,
627 PerlIOEncode_get_base,
631 PerlIOBuf_set_ptrcnt,
633 #endif /* encode layer */
635 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
641 SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI);
643 * we now "use Encode ()" here instead of
644 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
645 * is invoked without prior "use Encode". -- dankogai
647 PUSHSTACKi(PERLSI_MAGIC);
648 if (!get_cvs(OUR_DEFAULT_FB, 0)) {
650 /* This would just be an irritant now loading works */
651 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
653 /* The SV is magically freed by load_module */
654 load_module(PERL_LOADMOD_NOIMPORT, newSVpvs("Encode"), Nullsv, Nullsv);
655 assert(sp == PL_stack_sp);
659 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
660 /* should never happen */
661 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
667 PerlIO_define_layer(aTHX_ PERLIO_FUNCS_CAST(&PerlIO_encode));