1 #define PERL_NO_GET_CONTEXT
7 #define OUR_DEFAULT_FB "Encode::PERLQQ"
9 #if defined(USE_PERLIO)
11 /* Define an encoding "layer" in the perliol.h sense.
13 The layer defined here "inherits" in an object-oriented sense from
14 the "perlio" layer with its PerlIOBuf_* "methods". The
15 implementation is particularly efficient as until Encode settles
16 down there is no point in tryint to tune it.
18 The layer works by overloading the "fill" and "flush" methods.
20 "fill" calls "SUPER::fill" in perl terms, then calls the encode OO
21 perl API to convert the encoded data to UTF-8 form, then copies it
22 back to the buffer. The "base class's" read methods then see the
25 "flush" transforms the UTF-8 data deposited by the "base class's
26 write method in the buffer back into the encoded form using the
27 encode OO perl API, then copies data back into the buffer and calls
30 Note that "flush" is _also_ called for read mode - we still do the
31 (back)-translate so that the base class's "flush" sees the
32 correct number of encoded chars for positioning the seek
33 pointer. (This double translation is the worst performance issue -
34 particularly with all-perl encode engine.)
41 PerlIOBuf base; /* PerlIOBuf stuff */
42 SV *bufsv; /* buffer seen by layers above */
43 SV *dataSV; /* data we have read from layer below */
44 SV *enc; /* the encoding object */
45 SV *chk; /* CHECK in Encode methods */
46 int flags; /* Flags currently just needs lines */
47 int inEncodeCall; /* trap recursive encode calls */
53 PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
55 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
56 SV *sv = &PL_sv_undef;
57 PERL_UNUSED_ARG(param);
58 PERL_UNUSED_ARG(flags);
61 /* Not 100% sure stack swap is right thing to do during dup ... */
62 PUSHSTACKi(PERLSI_MAGIC);
68 if (call_method("name", G_SCALAR) == 1) {
81 PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg, PerlIO_funcs *tab)
83 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
85 IV code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv,tab);
88 PUSHSTACKi(PERLSI_MAGIC);
95 if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
96 /* should never happen */
97 Perl_die(aTHX_ "Encode::find_encoding did not return a value");
104 if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
106 if (ckWARN_d(WARN_IO))
107 Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
118 if (call_method("renew",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
119 if (ckWARN_d(WARN_IO))
120 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support renew method",
128 e->enc = newSVsv(result);
132 if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
133 if (ckWARN_d(WARN_IO))
134 Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
141 if (SvTRUE(result)) {
142 e->flags |= NEEDS_LINES;
145 PerlIOBase(f)->flags |= PERLIO_F_UTF8;
148 e->chk = newSVsv(get_sv("PerlIO::encoding::fallback", 0));
158 PerlIOEncode_popped(pTHX_ PerlIO * f)
160 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
162 SvREFCNT_dec(e->enc);
166 SvREFCNT_dec(e->bufsv);
170 SvREFCNT_dec(e->dataSV);
174 SvREFCNT_dec(e->chk);
181 PerlIOEncode_get_base(pTHX_ PerlIO * f)
183 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
185 e->base.bufsiz = 1024;
187 e->bufsv = newSV(e->base.bufsiz);
188 sv_setpvn(e->bufsv, "", 0);
190 e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
192 e->base.ptr = e->base.buf;
194 e->base.end = e->base.buf;
195 if (e->base.ptr < e->base.buf
196 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
197 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
198 e->base.buf + SvLEN(e->bufsv));
201 if (SvLEN(e->bufsv) < e->base.bufsiz) {
202 SSize_t poff = e->base.ptr - e->base.buf;
203 SSize_t eoff = e->base.end - e->base.buf;
204 e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
205 e->base.ptr = e->base.buf + poff;
206 e->base.end = e->base.buf + eoff;
208 if (e->base.ptr < e->base.buf
209 || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
210 Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
211 e->base.buf + SvLEN(e->bufsv));
218 PerlIOEncode_fill(pTHX_ PerlIO * f)
220 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
226 if (PerlIO_flush(f) != 0)
229 if (!PerlIO_fast_gets(n)) {
230 /* Things get too messy if we don't have a buffer layer
231 push a :perlio to do the job */
233 n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
235 Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
238 PUSHSTACKi(PERLSI_MAGIC);
242 avail = PerlIO_get_cnt(n);
244 avail = PerlIO_fill(n);
246 avail = PerlIO_get_cnt(n);
249 if (!PerlIO_error(n) && PerlIO_eof(n))
253 if (avail > 0 || (e->flags & NEEDS_LINES)) {
254 STDCHAR *ptr = PerlIO_get_ptr(n);
255 SSize_t use = (avail >= 0) ? avail : 0;
259 e->base.ptr = e->base.end = (STDCHAR *) NULL;
260 (void) PerlIOEncode_get_base(aTHX_ f);
262 e->dataSV = newSV(0);
263 if (SvTYPE(e->dataSV) < SVt_PV) {
264 sv_upgrade(e->dataSV,SVt_PV);
266 if (e->flags & NEEDS_LINES) {
267 /* Encoding needs whole lines (e.g. iso-2022-*)
268 search back from end of available data for
271 STDCHAR *nl = ptr+use-1;
278 if (nl >= ptr && *nl == '\n') {
279 /* found a line - take up to and including that */
282 else if (avail > 0) {
283 /* No line, but not EOF - append avail to the pending data */
284 sv_catpvn(e->dataSV, (char*)ptr, use);
285 PerlIO_set_ptrcnt(n, ptr+use, 0);
288 else if (!SvCUR(e->dataSV)) {
292 if (SvCUR(e->dataSV)) {
293 /* something left over from last time - create a normal
294 SV with new data appended
296 if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
297 if (e->flags & NEEDS_LINES) {
298 /* Have to grow buffer */
299 e->base.bufsiz = use + SvCUR(e->dataSV);
300 PerlIOEncode_get_base(aTHX_ f);
303 use = e->base.bufsiz - SvCUR(e->dataSV);
306 sv_catpvn(e->dataSV,(char*)ptr,use);
309 /* Create a "dummy" SV to represent the available data from layer below */
310 if (SvLEN(e->dataSV) && SvPVX_const(e->dataSV)) {
311 Safefree(SvPVX_mutable(e->dataSV));
313 if (use > (SSize_t)e->base.bufsiz) {
314 if (e->flags & NEEDS_LINES) {
315 /* Have to grow buffer */
316 e->base.bufsiz = use;
317 PerlIOEncode_get_base(aTHX_ f);
320 use = e->base.bufsiz;
323 SvPV_set(e->dataSV, (char *) ptr);
324 SvLEN_set(e->dataSV, 0); /* Hands off sv.c - it isn't yours */
325 SvCUR_set(e->dataSV,use);
326 SvPOK_only(e->dataSV);
328 SvUTF8_off(e->dataSV);
334 if (call_method("decode", G_SCALAR) != 1) {
335 Perl_die(aTHX_ "panic: decode did not return a value");
340 /* No cows allowed. */
341 if (SvTHINKFIRST(e->dataSV)) SvPV_force_nolen(e->dataSV);
342 /* Now get translated string (forced to UTF-8) and use as buffer */
344 s = SvPVutf8(uni, len);
345 #ifdef PARANOID_ENCODE_CHECKS
346 if (len && !is_utf8_string((U8*)s,len)) {
347 Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
353 /* if decode gave us back dataSV then data may vanish when
354 we do ptrcnt adjust - so take our copy now.
355 (The copy is a pain - need a put-it-here option for decode.)
357 sv_setpvn(e->bufsv,s,len);
358 e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
359 e->base.end = e->base.ptr + SvCUR(e->bufsv);
360 PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
363 /* Adjust ptr/cnt not taking anything which
364 did not translate - not clear this is a win */
365 /* compute amount we took */
366 if (!SvPOKp(e->dataSV)) (void)SvPV_force_nolen(e->dataSV);
367 use -= SvCUR(e->dataSV);
368 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
369 /* and as we did not take it it isn't pending */
370 SvCUR_set(e->dataSV,0);
372 /* Got nothing - assume partial character so we need some more */
373 /* Make sure e->dataSV is a normal SV before re-filling as
374 buffer alias will change under us
376 s = SvPV(e->dataSV,len);
377 sv_setpvn(e->dataSV,s,len);
378 PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
386 PerlIOBase(f)->flags |= PERLIO_F_EOF;
389 PerlIOBase(f)->flags |= PERLIO_F_ERROR;
390 Perl_PerlIO_save_errno(aTHX_ f);
400 PerlIOEncode_flush(pTHX_ PerlIO * f)
402 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
411 if ((PerlIOBase(f)->flags & PERLIO_F_WRBUF) && (e->base.ptr > e->base.buf)) {
412 if (e->inEncodeCall) return 0;
413 /* Write case - encode the buffer and write() to layer below */
414 PUSHSTACKi(PERLSI_MAGIC);
419 SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
425 if (call_method("encode", G_SCALAR) != 1) {
427 Perl_die(aTHX_ "panic: encode did not return a value");
434 count = PerlIO_write(PerlIONext(f),s,len);
435 if ((STRLEN)count != len) {
441 if (PerlIO_flush(PerlIONext(f)) != 0) {
444 if (!SvPOKp(e->bufsv) || SvTHINKFIRST(e->bufsv))
445 (void)SvPV_force_nolen(e->bufsv);
446 if ((STDCHAR *)SvPVX(e->bufsv) != e->base.buf) {
447 e->base.ptr = SvEND(e->bufsv);
448 e->base.end = SvPVX(e->bufsv) + (e->base.end-e->base.buf);
449 e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
451 (void)PerlIOEncode_get_base(aTHX_ f);
452 if (SvCUR(e->bufsv)) {
453 /* Did not all translate */
454 e->base.ptr = e->base.buf+SvCUR(e->bufsv);
458 else if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF)) {
460 /* if we have any untranslated stuff then unread that first */
461 /* FIXME - unread is fragile is there a better way ? */
462 if (e->dataSV && SvCUR(e->dataSV)) {
463 s = SvPV(e->dataSV, len);
464 count = PerlIO_unread(PerlIONext(f),s,len);
465 if ((STRLEN)count != len) {
468 SvCUR_set(e->dataSV,0);
470 /* See if there is anything left in the buffer */
471 if (e->base.ptr < e->base.end) {
472 if (e->inEncodeCall) return 0;
473 /* Bother - have unread data.
474 re-encode and unread() to layer below
476 PUSHSTACKi(PERLSI_MAGIC);
479 str = sv_newmortal();
480 sv_upgrade(str, SVt_PV);
481 SvPV_set(str, (char*)e->base.ptr);
483 SvCUR_set(str, e->base.end - e->base.ptr);
492 if (call_method("encode", G_SCALAR) != 1) {
494 Perl_die(aTHX_ "panic: encode did not return a value");
501 count = PerlIO_unread(PerlIONext(f),s,len);
502 if ((STRLEN)count != len) {
510 e->base.ptr = e->base.end = e->base.buf;
511 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
517 PerlIOEncode_close(pTHX_ PerlIO * f)
519 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
521 if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
522 /* Discard partial character */
524 SvCUR_set(e->dataSV,0);
526 /* Don't back decode and unread any pending data */
527 e->base.ptr = e->base.end = e->base.buf;
529 code = PerlIOBase_close(aTHX_ f);
531 /* This should only fire for write case */
532 if (e->base.buf && e->base.ptr > e->base.buf) {
533 Perl_croak(aTHX_ "Close with partial character");
535 SvREFCNT_dec(e->bufsv);
541 PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
546 PerlIOEncode_tell(pTHX_ PerlIO * f)
548 PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
549 /* Unfortunately the only way to get a position is to (re-)translate,
550 the UTF8 we have in buffer and then ask layer below
553 if (b->buf && b->ptr > b->buf) {
554 Perl_croak(aTHX_ "Cannot tell at partial character");
556 return PerlIO_tell(PerlIONext(f));
560 PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
561 CLONE_PARAMS * params, int flags)
563 if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
564 PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
565 PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
567 fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
574 PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
576 PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
577 if (e->flags & NEEDS_LINES) {
579 const char *ptr = (const char *) vbuf;
580 const char *end = ptr+count;
582 const char *nl = ptr;
583 while (nl < end && *nl++ != '\n') /* empty body */;
584 done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
585 if (done != nl-ptr) {
592 if (ptr[-1] == '\n') {
593 if (PerlIOEncode_flush(aTHX_ f) != 0) {
598 return (SSize_t) (ptr - (const char *) vbuf);
601 return PerlIOBuf_write(aTHX_ f, vbuf, count);
605 PerlIO_funcs PerlIO_encode = {
606 sizeof(PerlIO_funcs),
608 sizeof(PerlIOEncode),
609 PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
613 NULL, /* binmode - always pop */
628 PerlIOBase_setlinebuf,
629 PerlIOEncode_get_base,
633 PerlIOBuf_set_ptrcnt,
635 #endif /* encode layer */
637 MODULE = PerlIO::encoding PACKAGE = PerlIO::encoding
643 SV *chk = get_sv("PerlIO::encoding::fallback", GV_ADD|GV_ADDMULTI);
645 * we now "use Encode ()" here instead of
646 * PerlIO/encoding.pm. This avoids SEGV when ":encoding()"
647 * is invoked without prior "use Encode". -- dankogai
649 PUSHSTACKi(PERLSI_MAGIC);
650 if (!get_cvs(OUR_DEFAULT_FB, 0)) {
652 /* This would just be an irritant now loading works */
653 Perl_warner(aTHX_ packWARN(WARN_IO), ":encoding without 'use Encode'");
655 /* The SV is magically freed by load_module */
656 load_module(PERL_LOADMOD_NOIMPORT, newSVpvs("Encode"), Nullsv, Nullsv);
657 assert(sp == PL_stack_sp);
661 if (call_pv(OUR_DEFAULT_FB, G_SCALAR) != 1) {
662 /* should never happen */
663 Perl_die(aTHX_ "%s did not return a value",OUR_DEFAULT_FB);
669 PerlIO_define_layer(aTHX_ &PerlIO_encode);