This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Encode to CPAN version 2.54
[perl5.git] / cpan / Encode / Encode.xs
CommitLineData
85982a32 1/*
b9f6c370 2 $Id: Encode.xs,v 2.24 2013/08/29 16:47:39 dankogai Exp dankogai $
85982a32
JH
3 */
4
fcf2db38 5#define PERL_NO_GET_CONTEXT
2c674647
JH
6#include "EXTERN.h"
7#include "perl.h"
8#include "XSUB.h"
2f2b4ff2
NIS
9#define U8 U8
10#include "encode.h"
10c5ecbb
JH
11
12# define PERLIO_MODNAME "PerlIO::encoding"
0ab8f81e 13# define PERLIO_FILENAME "PerlIO/encoding.pm"
aae85ceb 14
85982a32
JH
15/* set 1 or more to profile. t/encoding.t dumps core because of
16 Perl_warner and PerlIO don't work well */
c6a7db43 17#define ENCODE_XS_PROFILE 0
39cf9a5e 18
85982a32
JH
19/* set 0 to disable floating point to calculate buffer size for
20 encode_method(). 1 is recommended. 2 restores NI-S original */
c6a7db43 21#define ENCODE_XS_USEFP 1
39cf9a5e 22
fcf2db38 23#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
2f5768b8 24 Perl_croak(aTHX_ "panic_unimplemented"); \
d1256cb1 25 return (y)0; /* fool picky compilers */ \
87714904 26 }
85982a32 27/**/
011b2d2f 28
7e9a885a
JH
29UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
30UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
33af2bc7 31
b85802c5
CBW
32#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
33# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE
34#else
35# define UTF8_ALLOW_STRICT 0
36#endif
37
7f0d54d7
RGS
38#define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \
39 ~(UTF8_ALLOW_CONTINUATION | \
40 UTF8_ALLOW_NON_CONTINUATION | \
41 UTF8_ALLOW_LONG))
42
b0b300a3 43void
aa0053b7 44Encode_XSEncoding(pTHX_ encode_t * enc)
2f2b4ff2 45{
aa0053b7
NIS
46 dSP;
47 HV *stash = gv_stashpv("Encode::XS", TRUE);
48 SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash);
49 int i = 0;
50 PUSHMARK(sp);
51 XPUSHs(sv);
52 while (enc->name[i]) {
d1256cb1
RGS
53 const char *name = enc->name[i++];
54 XPUSHs(sv_2mortal(newSVpvn(name, strlen(name))));
aa0053b7
NIS
55 }
56 PUTBACK;
57 call_pv("Encode::define_encoding", G_DISCARD);
58 SvREFCNT_dec(sv);
2f2b4ff2
NIS
59}
60
aa0053b7
NIS
61void
62call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
63{
85982a32 64 /* Exists for breakpointing */
aa0053b7 65}
67e989fb 66
85982a32 67
2fc614e0
JH
68#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
69#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
70
2f2b4ff2 71static SV *
370462a2 72do_fallback_cb(pTHX_ UV ch, SV *fallback_cb)
8e180e82
SP
73{
74 dSP;
75 int argc;
b2deda17 76 SV *retval = newSVpv("",0);
8e180e82
SP
77 ENTER;
78 SAVETMPS;
79 PUSHMARK(sp);
80 XPUSHs(sv_2mortal(newSVnv((UV)ch)));
81 PUTBACK;
82 argc = call_sv(fallback_cb, G_SCALAR);
83 SPAGAIN;
84 if (argc != 1){
370462a2 85 croak("fallback sub must return scalar!");
8e180e82 86 }
b2deda17 87 sv_catsv(retval, POPs);
8e180e82
SP
88 PUTBACK;
89 FREETMPS;
90 LEAVE;
91 return retval;
92}
93
94static SV *
0629a5b3 95encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
370462a2
RGS
96 int check, STRLEN * offset, SV * term, int * retcode,
97 SV *fallback_cb)
2f2b4ff2 98{
aa0053b7
NIS
99 STRLEN slen;
100 U8 *s = (U8 *) SvPV(src, slen);
3aececda
NIS
101 STRLEN tlen = slen;
102 STRLEN ddone = 0;
103 STRLEN sdone = 0;
3c49ab08 104 /* We allocate slen+1.
85982a32 105 PerlIO dumps core if this value is smaller than this. */
3c49ab08 106 SV *dst = sv_2mortal(newSV(slen+1));
85982a32
JH
107 U8 *d = (U8 *)SvPVX(dst);
108 STRLEN dlen = SvLEN(dst)-1;
220e2d4e
IH
109 int code = 0;
110 STRLEN trmlen = 0;
cc7dbc11 111 U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL;
220e2d4e 112
bbf2f5be
CBW
113 if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
114
220e2d4e
IH
115 if (offset) {
116 s += *offset;
6be7c101 117 if (slen > *offset){ /* safeguard against slen overflow */
d1256cb1 118 slen -= *offset;
6be7c101 119 }else{
d1256cb1 120 slen = 0;
6be7c101 121 }
220e2d4e
IH
122 tlen = slen;
123 }
85982a32 124
6be7c101 125 if (slen == 0){
d1256cb1
RGS
126 SvCUR_set(dst, 0);
127 SvPOK_only(dst);
128 goto ENCODE_END;
85982a32
JH
129 }
130
220e2d4e 131 while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check,
d1256cb1 132 trm, trmlen)) )
85982a32 133 {
d1256cb1
RGS
134 SvCUR_set(dst, dlen+ddone);
135 SvPOK_only(dst);
136
137 if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL ||
138 code == ENCODE_FOUND_TERM) {
139 break;
140 }
141 switch (code) {
142 case ENCODE_NOSPACE:
143 {
144 STRLEN more = 0; /* make sure you initialize! */
145 STRLEN sleft;
146 sdone += slen;
147 ddone += dlen;
148 sleft = tlen - sdone;
fcb875d4 149#if ENCODE_XS_PROFILE >= 2
d1256cb1
RGS
150 Perl_warn(aTHX_
151 "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
152 more, sdone, sleft, SvLEN(dst));
fcb875d4 153#endif
d1256cb1 154 if (sdone != 0) { /* has src ever been processed ? */
39cf9a5e 155#if ENCODE_XS_USEFP == 2
d1256cb1
RGS
156 more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
157 - SvLEN(dst);
39cf9a5e 158#elif ENCODE_XS_USEFP
d1256cb1 159 more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft);
39cf9a5e 160#else
d1256cb1
RGS
161 /* safe until SvLEN(dst) == MAX_INT/16 */
162 more = (16*SvLEN(dst)+1)/sdone/16 * sleft;
39cf9a5e 163#endif
d1256cb1
RGS
164 }
165 more += UTF8_MAXLEN; /* insurance policy */
166 d = (U8 *) SvGROW(dst, SvLEN(dst) + more);
167 /* dst need to grow need MORE bytes! */
168 if (ddone >= SvLEN(dst)) {
169 Perl_croak(aTHX_ "Destination couldn't be grown.");
170 }
171 dlen = SvLEN(dst)-ddone-1;
172 d += ddone;
173 s += slen;
174 slen = tlen-sdone;
175 continue;
176 }
177 case ENCODE_NOREP:
178 /* encoding */
179 if (dir == enc->f_utf8) {
180 STRLEN clen;
181 UV ch =
182 utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
183 &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
184 /* if non-representable multibyte prefix at end of current buffer - break*/
185 if (clen > tlen - sdone) break;
186 if (check & ENCODE_DIE_ON_ERR) {
187 Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
188 (UV)ch, enc->name[0]);
189 return &PL_sv_undef; /* never reaches but be safe */
190 }
191 if (check & ENCODE_WARN_ON_ERR){
192 Perl_warner(aTHX_ packWARN(WARN_UTF8),
193 ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]);
194 }
195 if (check & ENCODE_RETURN_ON_ERR){
196 goto ENCODE_SET_SRC;
197 }
198 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
199 SV* subchar =
370462a2
RGS
200 (fallback_cb != &PL_sv_undef)
201 ? do_fallback_cb(aTHX_ ch, fallback_cb)
202 : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
d1256cb1
RGS
203 check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
204 "&#x%" UVxf ";", (UV)ch);
b2deda17 205 SvUTF8_off(subchar); /* make sure no decoded string gets in */
d1256cb1
RGS
206 sdone += slen + clen;
207 ddone += dlen + SvCUR(subchar);
208 sv_catsv(dst, subchar);
209 SvREFCNT_dec(subchar);
210 } else {
211 /* fallback char */
212 sdone += slen + clen;
213 ddone += dlen + enc->replen;
214 sv_catpvn(dst, (char*)enc->rep, enc->replen);
215 }
216 }
217 /* decoding */
218 else {
219 if (check & ENCODE_DIE_ON_ERR){
220 Perl_croak(aTHX_ ERR_DECODE_NOMAP,
436c6dd3 221 enc->name[0], (UV)s[slen]);
d1256cb1
RGS
222 return &PL_sv_undef; /* never reaches but be safe */
223 }
224 if (check & ENCODE_WARN_ON_ERR){
225 Perl_warner(
226 aTHX_ packWARN(WARN_UTF8),
227 ERR_DECODE_NOMAP,
436c6dd3 228 enc->name[0], (UV)s[slen]);
d1256cb1
RGS
229 }
230 if (check & ENCODE_RETURN_ON_ERR){
231 goto ENCODE_SET_SRC;
232 }
233 if (check &
234 (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
235 SV* subchar =
370462a2
RGS
236 (fallback_cb != &PL_sv_undef)
237 ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb)
238 : newSVpvf("\\x%02" UVXf, (UV)s[slen]);
d1256cb1
RGS
239 sdone += slen + 1;
240 ddone += dlen + SvCUR(subchar);
241 sv_catsv(dst, subchar);
242 SvREFCNT_dec(subchar);
243 } else {
244 sdone += slen + 1;
245 ddone += dlen + strlen(FBCHAR_UTF8);
246 sv_catpv(dst, FBCHAR_UTF8);
247 }
248 }
249 /* settle variables when fallback */
250 d = (U8 *)SvEND(dst);
b0b300a3 251 dlen = SvLEN(dst) - ddone - 1;
d1256cb1
RGS
252 s = (U8*)SvPVX(src) + sdone;
253 slen = tlen - sdone;
254 break;
255
256 default:
257 Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
258 code, (dir == enc->f_utf8) ? "to" : "from",
259 enc->name[0]);
260 return &PL_sv_undef;
261 }
85982a32
JH
262 }
263 ENCODE_SET_SRC:
ca777f1c 264 if (check && !(check & ENCODE_LEAVE_SRC)){
d1256cb1
RGS
265 sdone = SvCUR(src) - (slen+sdone);
266 if (sdone) {
267 sv_setpvn(src, (char*)s+slen, sdone);
268 }
269 SvCUR_set(src, sdone);
2f2b4ff2 270 }
85982a32 271 /* warn("check = 0x%X, code = 0x%d\n", check, code); */
c6a7db43 272
85982a32
JH
273 SvCUR_set(dst, dlen+ddone);
274 SvPOK_only(dst);
c6a7db43 275
39cf9a5e
DK
276#if ENCODE_XS_PROFILE
277 if (SvCUR(dst) > SvCUR(src)){
d1256cb1
RGS
278 Perl_warn(aTHX_
279 "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n",
280 SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst),
281 (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
39cf9a5e 282 }
3c49ab08 283#endif
c6a7db43 284
220e2d4e
IH
285 if (offset)
286 *offset += sdone + slen;
287
85982a32 288 ENCODE_END:
0b3236bb 289 *SvEND(dst) = '\0';
220e2d4e 290 if (retcode) *retcode = code;
aa0053b7 291 return dst;
2f2b4ff2
NIS
292}
293
7f0d54d7
RGS
294static bool
295strict_utf8(pTHX_ SV* sv)
296{
297 HV* hv;
298 SV** svp;
299 sv = SvRV(sv);
300 if (!sv || SvTYPE(sv) != SVt_PVHV)
301 return 0;
302 hv = (HV*)sv;
303 svp = hv_fetch(hv, "strict_utf8", 11, 0);
304 if (!svp)
305 return 0;
306 return SvTRUE(*svp);
307}
308
309static U8*
b9627ca0 310process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
7f0d54d7
RGS
311 bool encode, bool strict, bool stop_at_partial)
312{
313 UV uv;
314 STRLEN ulen;
b9627ca0
RGS
315 SV *fallback_cb;
316 int check;
317
318 if (SvROK(check_sv)) {
319 /* croak("UTF-8 decoder doesn't support callback CHECK"); */
320 fallback_cb = check_sv;
321 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */
322 }
323 else {
324 fallback_cb = &PL_sv_undef;
325 check = SvIV(check_sv);
326 }
7f0d54d7
RGS
327
328 SvPOK_only(dst);
329 SvCUR_set(dst,0);
330
331 while (s < e) {
332 if (UTF8_IS_INVARIANT(*s)) {
333 sv_catpvn(dst, (char *)s, 1);
334 s++;
335 continue;
336 }
337
338 if (UTF8_IS_START(*s)) {
339 U8 skip = UTF8SKIP(s);
340 if ((s + skip) > e) {
341 /* Partial character */
342 /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */
56ff7374 343 if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL))
7f0d54d7
RGS
344 break;
345
346 goto malformed_byte;
347 }
348
349 uv = utf8n_to_uvuni(s, e - s, &ulen,
350 UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT :
351 UTF8_ALLOW_NONSTRICT)
352 );
353#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */
d1256cb1 354 if (strict && uv > PERL_UNICODE_MAX)
1b9a72a9 355 ulen = (STRLEN) -1;
7f0d54d7
RGS
356#endif
357 if (ulen == -1) {
358 if (strict) {
359 uv = utf8n_to_uvuni(s, e - s, &ulen,
360 UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT);
361 if (ulen == -1)
362 goto malformed_byte;
363 goto malformed;
364 }
365 goto malformed_byte;
366 }
367
368
369 /* Whole char is good */
370 sv_catpvn(dst,(char *)s,skip);
371 s += skip;
372 continue;
373 }
374
375 /* If we get here there is something wrong with alleged UTF-8 */
376 malformed_byte:
377 uv = (UV)*s;
378 ulen = 1;
379
380 malformed:
381 if (check & ENCODE_DIE_ON_ERR){
382 if (encode)
383 Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8");
384 else
385 Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv);
386 }
387 if (check & ENCODE_WARN_ON_ERR){
388 if (encode)
389 Perl_warner(aTHX_ packWARN(WARN_UTF8),
390 ERR_ENCODE_NOMAP, uv, "utf8");
391 else
392 Perl_warner(aTHX_ packWARN(WARN_UTF8),
393 ERR_DECODE_NOMAP, "utf8", uv);
394 }
395 if (check & ENCODE_RETURN_ON_ERR) {
396 break;
397 }
398 if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
b9627ca0
RGS
399 SV* subchar =
400 (fallback_cb != &PL_sv_undef)
401 ? do_fallback_cb(aTHX_ uv, fallback_cb)
402 : newSVpvf(check & ENCODE_PERLQQ
403 ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
404 : check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
405 : "&#x%" UVxf ";", uv);
406 if (encode){
407 SvUTF8_off(subchar); /* make sure no decoded string gets in */
408 }
7f0d54d7
RGS
409 sv_catsv(dst, subchar);
410 SvREFCNT_dec(subchar);
411 } else {
412 sv_catpv(dst, FBCHAR_UTF8);
413 }
414 s += ulen;
415 }
416 *SvEND(dst) = '\0';
417
418 return s;
419}
420
421
ab3374e4
DK
422MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_
423
a0d8a30e
DK
424PROTOTYPES: DISABLE
425
426void
b2deda17 427Method_decode_xs(obj,src,check_sv = &PL_sv_no)
ab3374e4
DK
428SV * obj
429SV * src
b2deda17 430SV * check_sv
e83d9f5f
RGS
431PREINIT:
432 STRLEN slen;
433 U8 *s;
434 U8 *e;
435 SV *dst;
436 bool renewed = 0;
b2deda17 437 int check;
ab3374e4
DK
438CODE:
439{
e83d9f5f 440 dSP; ENTER; SAVETMPS;
45a6a02c 441 if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
e83d9f5f
RGS
442 s = (U8 *) SvPV(src, slen);
443 e = (U8 *) SvEND(src);
b9627ca0 444 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
cc836e95 445 /*
7f0d54d7 446 * PerlIO check -- we assume the object is of PerlIO if renewed
cc836e95 447 */
cc836e95
RGS
448 PUSHMARK(sp);
449 XPUSHs(obj);
450 PUTBACK;
451 if (call_method("renewed",G_SCALAR) == 1) {
d1256cb1
RGS
452 SPAGAIN;
453 renewed = (bool)POPi;
454 PUTBACK;
cc836e95 455#if 0
d1256cb1 456 fprintf(stderr, "renewed == %d\n", renewed);
cc836e95 457#endif
cc836e95
RGS
458 }
459 FREETMPS; LEAVE;
460 /* end PerlIO check */
461
ab3374e4 462 if (SvUTF8(src)) {
d1256cb1
RGS
463 s = utf8_to_bytes(s,&slen);
464 if (s) {
465 SvCUR_set(src,slen);
466 SvUTF8_off(src);
467 e = s+slen;
468 }
469 else {
470 croak("Cannot decode string with wide characters");
471 }
ab3374e4 472 }
7f0d54d7 473
1e1ac268 474 dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
b9627ca0 475 s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed);
ab3374e4
DK
476
477 /* Clear out translated part of source unless asked not to */
478 if (check && !(check & ENCODE_LEAVE_SRC)){
d1256cb1
RGS
479 slen = e-s;
480 if (slen) {
481 sv_setpvn(src, (char*)s, slen);
482 }
483 SvCUR_set(src, slen);
ab3374e4
DK
484 }
485 SvUTF8_on(dst);
bbf2f5be 486 if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
1e1ac268 487 ST(0) = dst;
ab3374e4
DK
488 XSRETURN(1);
489}
490
491void
b2deda17 492Method_encode_xs(obj,src,check_sv = &PL_sv_no)
ab3374e4
DK
493SV * obj
494SV * src
b2deda17 495SV * check_sv
e83d9f5f
RGS
496PREINIT:
497 STRLEN slen;
498 U8 *s;
499 U8 *e;
500 SV *dst;
501 bool renewed = 0;
b2deda17 502 int check;
ab3374e4
DK
503CODE:
504{
b9627ca0 505 check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
45a6a02c 506 if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
e83d9f5f
RGS
507 s = (U8 *) SvPV(src, slen);
508 e = (U8 *) SvEND(src);
1e1ac268 509 dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
ab3374e4 510 if (SvUTF8(src)) {
d1256cb1
RGS
511 /* Already encoded */
512 if (strict_utf8(aTHX_ obj)) {
b9627ca0 513 s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0);
d1256cb1 514 }
7f0d54d7
RGS
515 else {
516 /* trust it and just copy the octets */
517 sv_setpvn(dst,(char *)s,(e-s));
d1256cb1 518 s = e;
7f0d54d7 519 }
ab3374e4
DK
520 }
521 else {
522 /* Native bytes - can always encode */
d1256cb1 523 U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */
ab3374e4 524 while (s < e) {
cf0d1c66
JH
525 UV uv = NATIVE_TO_UNI((UV) *s);
526 s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */
ab3374e4
DK
527 if (UNI_IS_INVARIANT(uv))
528 *d++ = (U8)UTF_TO_NATIVE(uv);
529 else {
530 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
531 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
532 }
d1256cb1 533 }
ab3374e4
DK
534 SvCUR_set(dst, d- (U8 *)SvPVX(dst));
535 *SvEND(dst) = '\0';
536 }
537
538 /* Clear out translated part of source unless asked not to */
539 if (check && !(check & ENCODE_LEAVE_SRC)){
d1256cb1
RGS
540 slen = e-s;
541 if (slen) {
542 sv_setpvn(src, (char*)s, slen);
543 }
544 SvCUR_set(src, slen);
ab3374e4
DK
545 }
546 SvPOK_only(dst);
547 SvUTF8_off(dst);
bbf2f5be 548 if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
1e1ac268 549 ST(0) = dst;
ab3374e4
DK
550 XSRETURN(1);
551}
552
50d26985 553MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
2f2b4ff2
NIS
554
555PROTOTYPES: ENABLE
556
557void
a0d8a30e
DK
558Method_renew(obj)
559SV * obj
560CODE:
561{
562 XSRETURN(1);
563}
564
cc836e95
RGS
565int
566Method_renewed(obj)
567SV * obj
568CODE:
569 RETVAL = 0;
570OUTPUT:
571 RETVAL
572
a0d8a30e 573void
0a95303c
NIS
574Method_name(obj)
575SV * obj
576CODE:
85982a32
JH
577{
578 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
579 ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
580 XSRETURN(1);
581}
0a95303c
NIS
582
583void
370462a2 584Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no)
220e2d4e
IH
585SV * obj
586SV * dst
587SV * src
588SV * off
589SV * term
370462a2 590SV * check_sv
220e2d4e
IH
591CODE:
592{
370462a2
RGS
593 int check;
594 SV *fallback_cb = &PL_sv_undef;
220e2d4e
IH
595 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
596 STRLEN offset = (STRLEN)SvIV(off);
597 int code = 0;
598 if (SvUTF8(src)) {
599 sv_utf8_downgrade(src, FALSE);
600 }
370462a2
RGS
601 if (SvROK(check_sv)){
602 fallback_cb = check_sv;
603 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
604 }else{
605 check = SvIV(check_sv);
606 }
220e2d4e 607 sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check,
370462a2 608 &offset, term, &code, fallback_cb));
b162af07 609 SvIV_set(off, (IV)offset);
220e2d4e 610 if (code == ENCODE_FOUND_TERM) {
d1256cb1 611 ST(0) = &PL_sv_yes;
220e2d4e 612 }else{
d1256cb1 613 ST(0) = &PL_sv_no;
220e2d4e
IH
614 }
615 XSRETURN(1);
616}
617
618void
8e180e82 619Method_decode(obj,src,check_sv = &PL_sv_no)
2f2b4ff2
NIS
620SV * obj
621SV * src
8e180e82 622SV * check_sv
2f2b4ff2 623CODE:
aae85ceb 624{
8e180e82 625 int check;
370462a2 626 SV *fallback_cb = &PL_sv_undef;
85982a32 627 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
ab3374e4
DK
628 if (SvUTF8(src)) {
629 sv_utf8_downgrade(src, FALSE);
630 }
8e180e82 631 if (SvROK(check_sv)){
370462a2
RGS
632 fallback_cb = check_sv;
633 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
8e180e82 634 }else{
370462a2 635 check = SvIV(check_sv);
8e180e82 636 }
220e2d4e 637 ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check,
370462a2 638 NULL, Nullsv, NULL, fallback_cb);
85982a32 639 SvUTF8_on(ST(0));
aae85ceb
DK
640 XSRETURN(1);
641}
642
643void
8e180e82 644Method_encode(obj,src,check_sv = &PL_sv_no)
aae85ceb 645SV * obj
85982a32 646SV * src
8e180e82 647SV * check_sv
aae85ceb
DK
648CODE:
649{
8e180e82 650 int check;
370462a2 651 SV *fallback_cb = &PL_sv_undef;
85982a32
JH
652 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
653 sv_utf8_upgrade(src);
8e180e82 654 if (SvROK(check_sv)){
370462a2
RGS
655 fallback_cb = check_sv;
656 check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
8e180e82 657 }else{
370462a2 658 check = SvIV(check_sv);
8e180e82 659 }
220e2d4e 660 ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check,
370462a2 661 NULL, Nullsv, NULL, fallback_cb);
aae85ceb
DK
662 XSRETURN(1);
663}
664
0ab8f81e
JH
665void
666Method_needs_lines(obj)
667SV * obj
668CODE:
669{
b32afa7c 670 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
0ab8f81e
JH
671 ST(0) = &PL_sv_no;
672 XSRETURN(1);
673}
674
675void
676Method_perlio_ok(obj)
677SV * obj
678CODE:
679{
b32afa7c 680 /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
10c5ecbb
JH
681 /* require_pv(PERLIO_FILENAME); */
682
683 eval_pv("require PerlIO::encoding", 0);
684
685 if (SvTRUE(get_sv("@", 0))) {
d1256cb1 686 ST(0) = &PL_sv_no;
10c5ecbb 687 }else{
d1256cb1 688 ST(0) = &PL_sv_yes;
0ab8f81e
JH
689 }
690 XSRETURN(1);
691}
692
742555bd
SP
693void
694Method_mime_name(obj)
695SV * obj
696CODE:
697{
698 encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
699 SV *retval;
700 eval_pv("require Encode::MIME::Name", 0);
701
702 if (SvTRUE(get_sv("@", 0))) {
703 ST(0) = &PL_sv_undef;
704 }else{
705 ENTER;
706 SAVETMPS;
707 PUSHMARK(sp);
708 XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
709 PUTBACK;
710 call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
711 SPAGAIN;
712 retval = newSVsv(POPs);
713 PUTBACK;
714 FREETMPS;
715 LEAVE;
716 /* enc->name[0] */
717 ST(0) = retval;
718 }
719 XSRETURN(1);
720}
721
67e989fb 722MODULE = Encode PACKAGE = Encode
2c674647
JH
723
724PROTOTYPES: ENABLE
725
67e989fb 726I32
2c674647 727_bytes_to_utf8(sv, ...)
85982a32
JH
728SV * sv
729CODE:
730{
731 SV * encoding = items == 2 ? ST(1) : Nullsv;
c6a7db43 732
85982a32
JH
733 if (encoding)
734 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
735 else {
d1256cb1
RGS
736 STRLEN len;
737 U8* s = (U8*)SvPV(sv, len);
738 U8* converted;
739
740 converted = bytes_to_utf8(s, &len); /* This allocs */
741 sv_setpvn(sv, (char *)converted, len);
742 SvUTF8_on(sv); /* XXX Should we? */
743 Safefree(converted); /* ... so free it */
744 RETVAL = len;
85982a32
JH
745 }
746}
747OUTPUT:
748 RETVAL
2c674647 749
67e989fb 750I32
2c674647 751_utf8_to_bytes(sv, ...)
85982a32
JH
752SV * sv
753CODE:
754{
755 SV * to = items > 1 ? ST(1) : Nullsv;
756 SV * check = items > 2 ? ST(2) : Nullsv;
757
758 if (to) {
d1256cb1 759 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
85982a32 760 } else {
d1256cb1
RGS
761 STRLEN len;
762 U8 *s = (U8*)SvPV(sv, len);
85982a32 763
d1256cb1
RGS
764 RETVAL = 0;
765 if (SvTRUE(check)) {
766 /* Must do things the slow way */
767 U8 *dest;
85982a32 768 /* We need a copy to pass to check() */
2cdf8b6c 769 U8 *src = s;
d1256cb1 770 U8 *send = s + len;
2cdf8b6c 771 U8 *d0;
85982a32 772
d1256cb1 773 New(83, dest, len, U8); /* I think */
2cdf8b6c 774 d0 = dest;
85982a32 775
d1256cb1 776 while (s < send) {
85982a32 777 if (*s < 0x80){
d1256cb1 778 *dest++ = *s++;
85982a32 779 } else {
d1256cb1
RGS
780 STRLEN ulen;
781 UV uv = *s++;
782
783 /* Have to do it all ourselves because of error routine,
784 aargh. */
785 if (!(uv & 0x40)){ goto failure; }
786 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
787 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
788 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
789 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
790 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
791 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
792 else { ulen = 13; uv = 0; }
793
794 /* Note change to utf8.c variable naming, for variety */
795 while (ulen--) {
796 if ((*s & 0xc0) != 0x80){
797 goto failure;
798 } else {
799 uv = (uv << 6) | (*s++ & 0x3f);
800 }
801 }
802 if (uv > 256) {
803 failure:
804 call_failure(check, s, dest, src);
805 /* Now what happens? */
806 }
807 *dest++ = (U8)uv;
808 }
809 }
2cdf8b6c
NC
810 RETVAL = dest - d0;
811 sv_usepvn(sv, (char *)dest, RETVAL);
812 SvUTF8_off(sv);
d1256cb1
RGS
813 } else {
814 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
815 }
85982a32
JH
816 }
817}
818OUTPUT:
819 RETVAL
2c674647 820
2c674647 821bool
b2704119 822is_utf8(sv, check = 0)
4411f3b6 823SV * sv
b2704119 824int check
85982a32
JH
825CODE:
826{
827 if (SvGMAGICAL(sv)) /* it could be $1, for example */
d1256cb1 828 sv = newSVsv(sv); /* GMAGIG will be done */
d1256cb1
RGS
829 RETVAL = SvUTF8(sv) ? TRUE : FALSE;
830 if (RETVAL &&
831 check &&
832 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
833 RETVAL = FALSE;
85982a32 834 if (sv != ST(0))
d1256cb1 835 SvREFCNT_dec(sv); /* it was a temp copy */
85982a32
JH
836}
837OUTPUT:
838 RETVAL
2c674647 839
b9f6c370
CBW
840#ifndef SvIsCOW
841# define SvIsCOW (SvREADONLY(sv) && SvFAKE(sv))
842#endif
843
2c674647 844SV *
4411f3b6 845_utf8_on(sv)
85982a32
JH
846SV * sv
847CODE:
848{
849 if (SvPOK(sv)) {
d1256cb1
RGS
850 SV *rsv = newSViv(SvUTF8(sv));
851 RETVAL = rsv;
b9f6c370 852 if (SvIsCOW(sv)) sv_force_normal(sv);
d1256cb1 853 SvUTF8_on(sv);
85982a32 854 } else {
d1256cb1 855 RETVAL = &PL_sv_undef;
85982a32
JH
856 }
857}
858OUTPUT:
859 RETVAL
2c674647
JH
860
861SV *
4411f3b6 862_utf8_off(sv)
85982a32
JH
863SV * sv
864CODE:
865{
866 if (SvPOK(sv)) {
d1256cb1
RGS
867 SV *rsv = newSViv(SvUTF8(sv));
868 RETVAL = rsv;
b9f6c370 869 if (SvIsCOW(sv)) sv_force_normal(sv);
d1256cb1 870 SvUTF8_off(sv);
85982a32 871 } else {
d1256cb1 872 RETVAL = &PL_sv_undef;
85982a32
JH
873 }
874}
875OUTPUT:
876 RETVAL
877
85982a32
JH
878int
879DIE_ON_ERR()
880CODE:
881 RETVAL = ENCODE_DIE_ON_ERR;
882OUTPUT:
883 RETVAL
884
c6a7db43 885int
85982a32
JH
886WARN_ON_ERR()
887CODE:
888 RETVAL = ENCODE_WARN_ON_ERR;
889OUTPUT:
890 RETVAL
891
892int
893LEAVE_SRC()
894CODE:
895 RETVAL = ENCODE_LEAVE_SRC;
896OUTPUT:
897 RETVAL
898
899int
900RETURN_ON_ERR()
901CODE:
902 RETVAL = ENCODE_RETURN_ON_ERR;
903OUTPUT:
904 RETVAL
905
906int
907PERLQQ()
908CODE:
909 RETVAL = ENCODE_PERLQQ;
910OUTPUT:
911 RETVAL
912
913int
af1f55d9
JH
914HTMLCREF()
915CODE:
916 RETVAL = ENCODE_HTMLCREF;
917OUTPUT:
918 RETVAL
919
920int
921XMLCREF()
922CODE:
923 RETVAL = ENCODE_XMLCREF;
924OUTPUT:
925 RETVAL
926
927int
56ff7374
RGS
928STOP_AT_PARTIAL()
929CODE:
930 RETVAL = ENCODE_STOP_AT_PARTIAL;
931OUTPUT:
932 RETVAL
933
934int
85982a32
JH
935FB_DEFAULT()
936CODE:
937 RETVAL = ENCODE_FB_DEFAULT;
938OUTPUT:
939 RETVAL
940
941int
942FB_CROAK()
943CODE:
944 RETVAL = ENCODE_FB_CROAK;
945OUTPUT:
946 RETVAL
947
948int
949FB_QUIET()
950CODE:
951 RETVAL = ENCODE_FB_QUIET;
952OUTPUT:
953 RETVAL
954
955int
956FB_WARN()
957CODE:
958 RETVAL = ENCODE_FB_WARN;
959OUTPUT:
960 RETVAL
961
962int
963FB_PERLQQ()
964CODE:
965 RETVAL = ENCODE_FB_PERLQQ;
966OUTPUT:
967 RETVAL
2c674647 968
af1f55d9
JH
969int
970FB_HTMLCREF()
971CODE:
972 RETVAL = ENCODE_FB_HTMLCREF;
973OUTPUT:
974 RETVAL
975
976int
977FB_XMLCREF()
978CODE:
979 RETVAL = ENCODE_FB_XMLCREF;
980OUTPUT:
981 RETVAL
982
33af2bc7
NIS
983BOOT:
984{
85982a32 985#include "def_t.h"
e7cbefb8 986#include "def_t.exh"
33af2bc7 987}