Commit | Line | Data |
---|---|---|
85982a32 | 1 | /* |
b5ab1f6f | 2 | $Id: Encode.xs,v 1.56 2003/06/18 09:29:02 dankogai Exp $ |
85982a32 JH |
3 | */ |
4 | ||
fcf2db38 | 5 | #define PERL_NO_GET_CONTEXT |
2c674647 JH |
6 | #include "EXTERN.h" |
7 | #include "perl.h" | |
8 | #include "XSUB.h" | |
2f2b4ff2 NIS |
9 | #define U8 U8 |
10 | #include "encode.h" | |
10c5ecbb JH |
11 | |
12 | # define PERLIO_MODNAME "PerlIO::encoding" | |
0ab8f81e | 13 | # define PERLIO_FILENAME "PerlIO/encoding.pm" |
aae85ceb | 14 | |
85982a32 JH |
15 | /* set 1 or more to profile. t/encoding.t dumps core because of |
16 | Perl_warner and PerlIO don't work well */ | |
c6a7db43 | 17 | #define ENCODE_XS_PROFILE 0 |
39cf9a5e | 18 | |
85982a32 JH |
19 | /* set 0 to disable floating point to calculate buffer size for |
20 | encode_method(). 1 is recommended. 2 restores NI-S original */ | |
c6a7db43 | 21 | #define ENCODE_XS_USEFP 1 |
39cf9a5e | 22 | |
fcf2db38 | 23 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ |
2f5768b8 | 24 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a | 25 | return (y)0; /* fool picky compilers */ \ |
87714904 | 26 | } |
85982a32 | 27 | /**/ |
011b2d2f | 28 | |
7e9a885a JH |
29 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
30 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) | |
33af2bc7 | 31 | |
b0b300a3 | 32 | void |
aa0053b7 | 33 | Encode_XSEncoding(pTHX_ encode_t * enc) |
2f2b4ff2 | 34 | { |
aa0053b7 NIS |
35 | dSP; |
36 | HV *stash = gv_stashpv("Encode::XS", TRUE); | |
37 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); | |
38 | int i = 0; | |
39 | PUSHMARK(sp); | |
40 | XPUSHs(sv); | |
41 | while (enc->name[i]) { | |
42 | const char *name = enc->name[i++]; | |
43 | XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); | |
44 | } | |
45 | PUTBACK; | |
46 | call_pv("Encode::define_encoding", G_DISCARD); | |
47 | SvREFCNT_dec(sv); | |
2f2b4ff2 NIS |
48 | } |
49 | ||
aa0053b7 NIS |
50 | void |
51 | call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) | |
52 | { | |
85982a32 | 53 | /* Exists for breakpointing */ |
aa0053b7 | 54 | } |
67e989fb | 55 | |
85982a32 | 56 | |
2fc614e0 JH |
57 | #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" |
58 | #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" | |
59 | ||
2f2b4ff2 | 60 | static SV * |
aa0053b7 | 61 | encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, |
220e2d4e | 62 | int check, STRLEN * offset, SV * term, int * retcode) |
2f2b4ff2 | 63 | { |
aa0053b7 NIS |
64 | STRLEN slen; |
65 | U8 *s = (U8 *) SvPV(src, slen); | |
3aececda NIS |
66 | STRLEN tlen = slen; |
67 | STRLEN ddone = 0; | |
68 | STRLEN sdone = 0; | |
39cf9a5e | 69 | |
3c49ab08 | 70 | /* We allocate slen+1. |
85982a32 | 71 | PerlIO dumps core if this value is smaller than this. */ |
3c49ab08 | 72 | SV *dst = sv_2mortal(newSV(slen+1)); |
85982a32 JH |
73 | U8 *d = (U8 *)SvPVX(dst); |
74 | STRLEN dlen = SvLEN(dst)-1; | |
220e2d4e IH |
75 | int code = 0; |
76 | STRLEN trmlen = 0; | |
cc7dbc11 | 77 | U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; |
220e2d4e IH |
78 | |
79 | if (offset) { | |
80 | s += *offset; | |
6be7c101 JH |
81 | if (slen > *offset){ /* safeguard against slen overflow */ |
82 | slen -= *offset; | |
83 | }else{ | |
84 | slen = 0; | |
85 | } | |
220e2d4e IH |
86 | tlen = slen; |
87 | } | |
85982a32 | 88 | |
6be7c101 | 89 | if (slen == 0){ |
85982a32 JH |
90 | SvCUR_set(dst, 0); |
91 | SvPOK_only(dst); | |
92 | goto ENCODE_END; | |
93 | } | |
94 | ||
220e2d4e IH |
95 | while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, |
96 | trm, trmlen)) ) | |
85982a32 JH |
97 | { |
98 | SvCUR_set(dst, dlen+ddone); | |
99 | SvPOK_only(dst); | |
0b3236bb | 100 | |
220e2d4e IH |
101 | if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || |
102 | code == ENCODE_FOUND_TERM) { | |
85982a32 JH |
103 | break; |
104 | } | |
105 | switch (code) { | |
106 | case ENCODE_NOSPACE: | |
107 | { | |
108 | STRLEN more = 0; /* make sure you initialize! */ | |
109 | STRLEN sleft; | |
110 | sdone += slen; | |
111 | ddone += dlen; | |
112 | sleft = tlen - sdone; | |
fcb875d4 | 113 | #if ENCODE_XS_PROFILE >= 2 |
85982a32 JH |
114 | Perl_warn(aTHX_ |
115 | "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", | |
116 | more, sdone, sleft, SvLEN(dst)); | |
fcb875d4 | 117 | #endif |
85982a32 | 118 | if (sdone != 0) { /* has src ever been processed ? */ |
39cf9a5e | 119 | #if ENCODE_XS_USEFP == 2 |
85982a32 JH |
120 | more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone |
121 | - SvLEN(dst); | |
39cf9a5e | 122 | #elif ENCODE_XS_USEFP |
6e21dc91 | 123 | more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); |
39cf9a5e | 124 | #else |
85982a32 JH |
125 | /* safe until SvLEN(dst) == MAX_INT/16 */ |
126 | more = (16*SvLEN(dst)+1)/sdone/16 * sleft; | |
39cf9a5e | 127 | #endif |
39cf9a5e | 128 | } |
85982a32 JH |
129 | more += UTF8_MAXLEN; /* insurance policy */ |
130 | d = (U8 *) SvGROW(dst, SvLEN(dst) + more); | |
131 | /* dst need to grow need MORE bytes! */ | |
132 | if (ddone >= SvLEN(dst)) { | |
133 | Perl_croak(aTHX_ "Destination couldn't be grown."); | |
134 | } | |
135 | dlen = SvLEN(dst)-ddone-1; | |
136 | d += ddone; | |
137 | s += slen; | |
138 | slen = tlen-sdone; | |
139 | continue; | |
140 | } | |
141 | case ENCODE_NOREP: | |
142 | /* encoding */ | |
c6a7db43 | 143 | if (dir == enc->f_utf8) { |
85982a32 JH |
144 | STRLEN clen; |
145 | UV ch = | |
3e952a88 | 146 | utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), |
b0b300a3 | 147 | &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); |
85982a32 | 148 | if (check & ENCODE_DIE_ON_ERR) { |
2fc614e0 JH |
149 | Perl_croak(aTHX_ ERR_ENCODE_NOMAP, |
150 | (UV)ch, enc->name[0]); | |
4089adc4 JH |
151 | return &PL_sv_undef; /* never reaches but be safe */ |
152 | } | |
153 | if (check & ENCODE_WARN_ON_ERR){ | |
154 | Perl_warner(aTHX_ packWARN(WARN_UTF8), | |
2fc614e0 | 155 | ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); |
4089adc4 JH |
156 | } |
157 | if (check & ENCODE_RETURN_ON_ERR){ | |
158 | goto ENCODE_SET_SRC; | |
159 | } | |
160 | if (check & ENCODE_PERLQQ){ | |
161 | SV* perlqq = | |
162 | sv_2mortal(newSVpvf("\\x{%04"UVxf"}", (UV)ch)); | |
163 | sdone += slen + clen; | |
164 | ddone += dlen + SvCUR(perlqq); | |
165 | sv_catsv(dst, perlqq); | |
166 | }else if (check & ENCODE_HTMLCREF){ | |
167 | SV* htmlcref = | |
168 | sv_2mortal(newSVpvf("&#%" UVuf ";", (UV)ch)); | |
169 | sdone += slen + clen; | |
170 | ddone += dlen + SvCUR(htmlcref); | |
171 | sv_catsv(dst, htmlcref); | |
172 | }else if (check & ENCODE_XMLCREF){ | |
173 | SV* xmlcref = | |
174 | sv_2mortal(newSVpvf("&#x%" UVxf ";", (UV)ch)); | |
175 | sdone += slen + clen; | |
176 | ddone += dlen + SvCUR(xmlcref); | |
177 | sv_catsv(dst, xmlcref); | |
178 | } else { | |
179 | /* fallback char */ | |
180 | sdone += slen + clen; | |
181 | ddone += dlen + enc->replen; | |
182 | sv_catpvn(dst, (char*)enc->rep, enc->replen); | |
c6a7db43 | 183 | } |
b2704119 | 184 | } |
85982a32 | 185 | /* decoding */ |
c6a7db43 | 186 | else { |
85982a32 | 187 | if (check & ENCODE_DIE_ON_ERR){ |
2fc614e0 | 188 | Perl_croak(aTHX_ ERR_DECODE_NOMAP, |
436c6dd3 | 189 | enc->name[0], (UV)s[slen]); |
4089adc4 JH |
190 | return &PL_sv_undef; /* never reaches but be safe */ |
191 | } | |
192 | if (check & ENCODE_WARN_ON_ERR){ | |
193 | Perl_warner( | |
194 | aTHX_ packWARN(WARN_UTF8), | |
2fc614e0 | 195 | ERR_DECODE_NOMAP, |
436c6dd3 | 196 | enc->name[0], (UV)s[slen]); |
4089adc4 JH |
197 | } |
198 | if (check & ENCODE_RETURN_ON_ERR){ | |
199 | goto ENCODE_SET_SRC; | |
200 | } | |
201 | if (check & | |
202 | (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ | |
203 | SV* perlqq = | |
204 | sv_2mortal(newSVpvf("\\x%02" UVXf, (UV)s[slen])); | |
205 | sdone += slen + 1; | |
206 | ddone += dlen + SvCUR(perlqq); | |
207 | sv_catsv(dst, perlqq); | |
208 | } else { | |
209 | sdone += slen + 1; | |
210 | ddone += dlen + strlen(FBCHAR_UTF8); | |
211 | sv_catpv(dst, FBCHAR_UTF8); | |
aa0053b7 | 212 | } |
b2704119 | 213 | } |
85982a32 | 214 | /* settle variables when fallback */ |
b0b300a3 JH |
215 | d = (U8 *)SvEND(dst); |
216 | dlen = SvLEN(dst) - ddone - 1; | |
3e952a88 | 217 | s = (U8*)SvPVX(src) + sdone; |
b2704119 JH |
218 | slen = tlen - sdone; |
219 | break; | |
2f2b4ff2 | 220 | |
85982a32 JH |
221 | default: |
222 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", | |
223 | code, (dir == enc->f_utf8) ? "to" : "from", | |
224 | enc->name[0]); | |
225 | return &PL_sv_undef; | |
aa0053b7 | 226 | } |
85982a32 JH |
227 | } |
228 | ENCODE_SET_SRC: | |
ca777f1c NIS |
229 | if (check && !(check & ENCODE_LEAVE_SRC)){ |
230 | sdone = SvCUR(src) - (slen+sdone); | |
85982a32 JH |
231 | if (sdone) { |
232 | sv_setpvn(src, (char*)s+slen, sdone); | |
aa0053b7 | 233 | } |
85982a32 | 234 | SvCUR_set(src, sdone); |
2f2b4ff2 | 235 | } |
85982a32 | 236 | /* warn("check = 0x%X, code = 0x%d\n", check, code); */ |
c6a7db43 | 237 | |
85982a32 JH |
238 | SvCUR_set(dst, dlen+ddone); |
239 | SvPOK_only(dst); | |
c6a7db43 | 240 | |
39cf9a5e DK |
241 | #if ENCODE_XS_PROFILE |
242 | if (SvCUR(dst) > SvCUR(src)){ | |
85982a32 JH |
243 | Perl_warn(aTHX_ |
244 | "SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", | |
245 | SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), | |
246 | (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); | |
39cf9a5e | 247 | } |
3c49ab08 | 248 | #endif |
c6a7db43 | 249 | |
220e2d4e IH |
250 | if (offset) |
251 | *offset += sdone + slen; | |
252 | ||
85982a32 | 253 | ENCODE_END: |
0b3236bb | 254 | *SvEND(dst) = '\0'; |
220e2d4e | 255 | if (retcode) *retcode = code; |
aa0053b7 | 256 | return dst; |
2f2b4ff2 NIS |
257 | } |
258 | ||
ab3374e4 DK |
259 | MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ |
260 | ||
a0d8a30e DK |
261 | PROTOTYPES: DISABLE |
262 | ||
263 | void | |
264 | Method_renew(obj) | |
265 | SV * obj | |
266 | CODE: | |
267 | { | |
268 | XSRETURN(1); | |
269 | } | |
270 | ||
ab3374e4 | 271 | void |
b536bf57 | 272 | Method_decode_xs(obj,src,check = 0) |
ab3374e4 DK |
273 | SV * obj |
274 | SV * src | |
275 | int check | |
276 | CODE: | |
277 | { | |
278 | STRLEN slen; | |
279 | U8 *s = (U8 *) SvPV(src, slen); | |
280 | U8 *e = (U8 *) SvEND(src); | |
b536bf57 | 281 | SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ |
ab3374e4 DK |
282 | SvPOK_only(dst); |
283 | SvCUR_set(dst,0); | |
284 | if (SvUTF8(src)) { | |
285 | s = utf8_to_bytes(s,&slen); | |
286 | if (s) { | |
287 | SvCUR_set(src,slen); | |
288 | SvUTF8_off(src); | |
289 | e = s+slen; | |
290 | } | |
291 | else { | |
292 | croak("Cannot decode string with wide characters"); | |
293 | } | |
294 | } | |
295 | while (s < e) { | |
296 | if (UTF8_IS_INVARIANT(*s) || UTF8_IS_START(*s)) { | |
297 | U8 skip = UTF8SKIP(s); | |
298 | if ((s + skip) > e) { | |
299 | /* Partial character - done */ | |
300 | break; | |
301 | } | |
302 | else if (is_utf8_char(s)) { | |
303 | /* Whole char is good */ | |
304 | sv_catpvn(dst,(char *)s,skip); | |
305 | s += skip; | |
306 | continue; | |
307 | } | |
308 | else { | |
309 | /* starts ok but isn't "good" */ | |
310 | } | |
311 | } | |
312 | else { | |
313 | /* Invalid start byte */ | |
314 | } | |
315 | /* If we get here there is something wrong with alleged UTF-8 */ | |
316 | if (check & ENCODE_DIE_ON_ERR){ | |
317 | Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); | |
318 | XSRETURN(0); | |
319 | } | |
320 | if (check & ENCODE_WARN_ON_ERR){ | |
321 | Perl_warner(aTHX_ packWARN(WARN_UTF8), | |
322 | ERR_DECODE_NOMAP, "utf8", (UV)*s); | |
323 | } | |
324 | if (check & ENCODE_RETURN_ON_ERR) { | |
325 | break; | |
326 | } | |
327 | if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ | |
328 | SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); | |
329 | sv_catsv(dst, perlqq); | |
330 | SvREFCNT_dec(perlqq); | |
331 | } else { | |
332 | sv_catpv(dst, FBCHAR_UTF8); | |
333 | } | |
334 | s++; | |
335 | } | |
336 | *SvEND(dst) = '\0'; | |
337 | ||
338 | /* Clear out translated part of source unless asked not to */ | |
339 | if (check && !(check & ENCODE_LEAVE_SRC)){ | |
340 | slen = e-s; | |
341 | if (slen) { | |
342 | sv_setpvn(src, (char*)s, slen); | |
343 | } | |
344 | SvCUR_set(src, slen); | |
345 | } | |
346 | SvUTF8_on(dst); | |
347 | ST(0) = sv_2mortal(dst); | |
348 | XSRETURN(1); | |
349 | } | |
350 | ||
351 | void | |
b536bf57 | 352 | Method_encode_xs(obj,src,check = 0) |
ab3374e4 DK |
353 | SV * obj |
354 | SV * src | |
355 | int check | |
356 | CODE: | |
357 | { | |
358 | STRLEN slen; | |
359 | U8 *s = (U8 *) SvPV(src, slen); | |
360 | U8 *e = (U8 *) SvEND(src); | |
b536bf57 | 361 | SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */ |
ab3374e4 DK |
362 | if (SvUTF8(src)) { |
363 | /* Already encoded - trust it and just copy the octets */ | |
364 | sv_setpvn(dst,(char *)s,(e-s)); | |
365 | s = e; | |
366 | } | |
367 | else { | |
368 | /* Native bytes - can always encode */ | |
b536bf57 | 369 | U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ |
ab3374e4 DK |
370 | while (s < e) { |
371 | UV uv = NATIVE_TO_UNI((UV) *s++); | |
372 | if (UNI_IS_INVARIANT(uv)) | |
373 | *d++ = (U8)UTF_TO_NATIVE(uv); | |
374 | else { | |
375 | *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); | |
376 | *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); | |
377 | } | |
378 | } | |
379 | SvCUR_set(dst, d- (U8 *)SvPVX(dst)); | |
380 | *SvEND(dst) = '\0'; | |
381 | } | |
382 | ||
383 | /* Clear out translated part of source unless asked not to */ | |
384 | if (check && !(check & ENCODE_LEAVE_SRC)){ | |
385 | slen = e-s; | |
386 | if (slen) { | |
387 | sv_setpvn(src, (char*)s, slen); | |
388 | } | |
389 | SvCUR_set(src, slen); | |
390 | } | |
391 | SvPOK_only(dst); | |
392 | SvUTF8_off(dst); | |
393 | ST(0) = sv_2mortal(dst); | |
394 | XSRETURN(1); | |
395 | } | |
396 | ||
50d26985 | 397 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
2f2b4ff2 NIS |
398 | |
399 | PROTOTYPES: ENABLE | |
400 | ||
401 | void | |
a0d8a30e DK |
402 | Method_renew(obj) |
403 | SV * obj | |
404 | CODE: | |
405 | { | |
406 | XSRETURN(1); | |
407 | } | |
408 | ||
409 | void | |
0a95303c NIS |
410 | Method_name(obj) |
411 | SV * obj | |
412 | CODE: | |
85982a32 JH |
413 | { |
414 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); | |
415 | ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); | |
416 | XSRETURN(1); | |
417 | } | |
0a95303c NIS |
418 | |
419 | void | |
220e2d4e IH |
420 | Method_cat_decode(obj, dst, src, off, term, check = 0) |
421 | SV * obj | |
422 | SV * dst | |
423 | SV * src | |
424 | SV * off | |
425 | SV * term | |
426 | int check | |
427 | CODE: | |
428 | { | |
429 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); | |
430 | STRLEN offset = (STRLEN)SvIV(off); | |
431 | int code = 0; | |
432 | if (SvUTF8(src)) { | |
433 | sv_utf8_downgrade(src, FALSE); | |
434 | } | |
435 | sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, | |
436 | &offset, term, &code)); | |
437 | SvIVX(off) = (IV)offset; | |
438 | if (code == ENCODE_FOUND_TERM) { | |
439 | ST(0) = &PL_sv_yes; | |
440 | }else{ | |
441 | ST(0) = &PL_sv_no; | |
442 | } | |
443 | XSRETURN(1); | |
444 | } | |
445 | ||
446 | void | |
b2704119 | 447 | Method_decode(obj,src,check = 0) |
2f2b4ff2 NIS |
448 | SV * obj |
449 | SV * src | |
b2704119 | 450 | int check |
2f2b4ff2 | 451 | CODE: |
aae85ceb | 452 | { |
85982a32 | 453 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
ab3374e4 DK |
454 | if (SvUTF8(src)) { |
455 | sv_utf8_downgrade(src, FALSE); | |
456 | } | |
220e2d4e IH |
457 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, |
458 | NULL, Nullsv, NULL); | |
85982a32 | 459 | SvUTF8_on(ST(0)); |
aae85ceb DK |
460 | XSRETURN(1); |
461 | } | |
462 | ||
463 | void | |
85982a32 | 464 | Method_encode(obj,src,check = 0) |
aae85ceb | 465 | SV * obj |
85982a32 JH |
466 | SV * src |
467 | int check | |
aae85ceb DK |
468 | CODE: |
469 | { | |
85982a32 JH |
470 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
471 | sv_utf8_upgrade(src); | |
220e2d4e IH |
472 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, |
473 | NULL, Nullsv, NULL); | |
aae85ceb DK |
474 | XSRETURN(1); |
475 | } | |
476 | ||
0ab8f81e JH |
477 | void |
478 | Method_needs_lines(obj) | |
479 | SV * obj | |
480 | CODE: | |
481 | { | |
b32afa7c | 482 | /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ |
0ab8f81e JH |
483 | ST(0) = &PL_sv_no; |
484 | XSRETURN(1); | |
485 | } | |
486 | ||
487 | void | |
488 | Method_perlio_ok(obj) | |
489 | SV * obj | |
490 | CODE: | |
491 | { | |
b32afa7c | 492 | /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ |
10c5ecbb JH |
493 | /* require_pv(PERLIO_FILENAME); */ |
494 | ||
495 | eval_pv("require PerlIO::encoding", 0); | |
496 | ||
497 | if (SvTRUE(get_sv("@", 0))) { | |
0ab8f81e | 498 | ST(0) = &PL_sv_no; |
10c5ecbb JH |
499 | }else{ |
500 | ST(0) = &PL_sv_yes; | |
0ab8f81e JH |
501 | } |
502 | XSRETURN(1); | |
503 | } | |
504 | ||
67e989fb | 505 | MODULE = Encode PACKAGE = Encode |
2c674647 JH |
506 | |
507 | PROTOTYPES: ENABLE | |
508 | ||
67e989fb | 509 | I32 |
2c674647 | 510 | _bytes_to_utf8(sv, ...) |
85982a32 JH |
511 | SV * sv |
512 | CODE: | |
513 | { | |
514 | SV * encoding = items == 2 ? ST(1) : Nullsv; | |
c6a7db43 | 515 | |
85982a32 JH |
516 | if (encoding) |
517 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); | |
518 | else { | |
519 | STRLEN len; | |
520 | U8* s = (U8*)SvPV(sv, len); | |
521 | U8* converted; | |
522 | ||
523 | converted = bytes_to_utf8(s, &len); /* This allocs */ | |
524 | sv_setpvn(sv, (char *)converted, len); | |
525 | SvUTF8_on(sv); /* XXX Should we? */ | |
526 | Safefree(converted); /* ... so free it */ | |
527 | RETVAL = len; | |
528 | } | |
529 | } | |
530 | OUTPUT: | |
531 | RETVAL | |
2c674647 | 532 | |
67e989fb | 533 | I32 |
2c674647 | 534 | _utf8_to_bytes(sv, ...) |
85982a32 JH |
535 | SV * sv |
536 | CODE: | |
537 | { | |
538 | SV * to = items > 1 ? ST(1) : Nullsv; | |
539 | SV * check = items > 2 ? ST(2) : Nullsv; | |
540 | ||
541 | if (to) { | |
542 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); | |
543 | } else { | |
544 | STRLEN len; | |
545 | U8 *s = (U8*)SvPV(sv, len); | |
546 | ||
547 | RETVAL = 0; | |
548 | if (SvTRUE(check)) { | |
549 | /* Must do things the slow way */ | |
550 | U8 *dest; | |
551 | /* We need a copy to pass to check() */ | |
c6a7db43 | 552 | U8 *src = (U8*)savepv((char *)s); |
85982a32 JH |
553 | U8 *send = s + len; |
554 | ||
555 | New(83, dest, len, U8); /* I think */ | |
556 | ||
557 | while (s < send) { | |
558 | if (*s < 0x80){ | |
559 | *dest++ = *s++; | |
560 | } else { | |
561 | STRLEN ulen; | |
562 | UV uv = *s++; | |
563 | ||
564 | /* Have to do it all ourselves because of error routine, | |
565 | aargh. */ | |
566 | if (!(uv & 0x40)){ goto failure; } | |
567 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } | |
568 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } | |
569 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } | |
570 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } | |
571 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } | |
572 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } | |
573 | else { ulen = 13; uv = 0; } | |
87714904 | 574 | |
85982a32 JH |
575 | /* Note change to utf8.c variable naming, for variety */ |
576 | while (ulen--) { | |
c6a7db43 NIS |
577 | if ((*s & 0xc0) != 0x80){ |
578 | goto failure; | |
85982a32 JH |
579 | } else { |
580 | uv = (uv << 6) | (*s++ & 0x3f); | |
581 | } | |
87714904 | 582 | } |
67e989fb JH |
583 | if (uv > 256) { |
584 | failure: | |
85982a32 JH |
585 | call_failure(check, s, dest, src); |
586 | /* Now what happens? */ | |
67e989fb JH |
587 | } |
588 | *dest++ = (U8)uv; | |
85982a32 JH |
589 | } |
590 | } | |
591 | } else { | |
592 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); | |
2c674647 | 593 | } |
85982a32 JH |
594 | } |
595 | } | |
596 | OUTPUT: | |
597 | RETVAL | |
2c674647 | 598 | |
2c674647 | 599 | bool |
b2704119 | 600 | is_utf8(sv, check = 0) |
4411f3b6 | 601 | SV * sv |
b2704119 | 602 | int check |
85982a32 JH |
603 | CODE: |
604 | { | |
605 | if (SvGMAGICAL(sv)) /* it could be $1, for example */ | |
606 | sv = newSVsv(sv); /* GMAGIG will be done */ | |
607 | if (SvPOK(sv)) { | |
608 | RETVAL = SvUTF8(sv) ? TRUE : FALSE; | |
609 | if (RETVAL && | |
610 | check && | |
611 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) | |
2c674647 | 612 | RETVAL = FALSE; |
85982a32 JH |
613 | } else { |
614 | RETVAL = FALSE; | |
615 | } | |
616 | if (sv != ST(0)) | |
617 | SvREFCNT_dec(sv); /* it was a temp copy */ | |
618 | } | |
619 | OUTPUT: | |
620 | RETVAL | |
2c674647 JH |
621 | |
622 | SV * | |
4411f3b6 | 623 | _utf8_on(sv) |
85982a32 JH |
624 | SV * sv |
625 | CODE: | |
626 | { | |
627 | if (SvPOK(sv)) { | |
628 | SV *rsv = newSViv(SvUTF8(sv)); | |
629 | RETVAL = rsv; | |
630 | SvUTF8_on(sv); | |
631 | } else { | |
632 | RETVAL = &PL_sv_undef; | |
633 | } | |
634 | } | |
635 | OUTPUT: | |
636 | RETVAL | |
2c674647 JH |
637 | |
638 | SV * | |
4411f3b6 | 639 | _utf8_off(sv) |
85982a32 JH |
640 | SV * sv |
641 | CODE: | |
642 | { | |
643 | if (SvPOK(sv)) { | |
644 | SV *rsv = newSViv(SvUTF8(sv)); | |
645 | RETVAL = rsv; | |
646 | SvUTF8_off(sv); | |
647 | } else { | |
648 | RETVAL = &PL_sv_undef; | |
649 | } | |
650 | } | |
651 | OUTPUT: | |
652 | RETVAL | |
653 | ||
85982a32 JH |
654 | int |
655 | DIE_ON_ERR() | |
656 | CODE: | |
657 | RETVAL = ENCODE_DIE_ON_ERR; | |
658 | OUTPUT: | |
659 | RETVAL | |
660 | ||
c6a7db43 | 661 | int |
85982a32 JH |
662 | WARN_ON_ERR() |
663 | CODE: | |
664 | RETVAL = ENCODE_WARN_ON_ERR; | |
665 | OUTPUT: | |
666 | RETVAL | |
667 | ||
668 | int | |
669 | LEAVE_SRC() | |
670 | CODE: | |
671 | RETVAL = ENCODE_LEAVE_SRC; | |
672 | OUTPUT: | |
673 | RETVAL | |
674 | ||
675 | int | |
676 | RETURN_ON_ERR() | |
677 | CODE: | |
678 | RETVAL = ENCODE_RETURN_ON_ERR; | |
679 | OUTPUT: | |
680 | RETVAL | |
681 | ||
682 | int | |
683 | PERLQQ() | |
684 | CODE: | |
685 | RETVAL = ENCODE_PERLQQ; | |
686 | OUTPUT: | |
687 | RETVAL | |
688 | ||
689 | int | |
af1f55d9 JH |
690 | HTMLCREF() |
691 | CODE: | |
692 | RETVAL = ENCODE_HTMLCREF; | |
693 | OUTPUT: | |
694 | RETVAL | |
695 | ||
696 | int | |
697 | XMLCREF() | |
698 | CODE: | |
699 | RETVAL = ENCODE_XMLCREF; | |
700 | OUTPUT: | |
701 | RETVAL | |
702 | ||
703 | int | |
85982a32 JH |
704 | FB_DEFAULT() |
705 | CODE: | |
706 | RETVAL = ENCODE_FB_DEFAULT; | |
707 | OUTPUT: | |
708 | RETVAL | |
709 | ||
710 | int | |
711 | FB_CROAK() | |
712 | CODE: | |
713 | RETVAL = ENCODE_FB_CROAK; | |
714 | OUTPUT: | |
715 | RETVAL | |
716 | ||
717 | int | |
718 | FB_QUIET() | |
719 | CODE: | |
720 | RETVAL = ENCODE_FB_QUIET; | |
721 | OUTPUT: | |
722 | RETVAL | |
723 | ||
724 | int | |
725 | FB_WARN() | |
726 | CODE: | |
727 | RETVAL = ENCODE_FB_WARN; | |
728 | OUTPUT: | |
729 | RETVAL | |
730 | ||
731 | int | |
732 | FB_PERLQQ() | |
733 | CODE: | |
734 | RETVAL = ENCODE_FB_PERLQQ; | |
735 | OUTPUT: | |
736 | RETVAL | |
2c674647 | 737 | |
af1f55d9 JH |
738 | int |
739 | FB_HTMLCREF() | |
740 | CODE: | |
741 | RETVAL = ENCODE_FB_HTMLCREF; | |
742 | OUTPUT: | |
743 | RETVAL | |
744 | ||
745 | int | |
746 | FB_XMLCREF() | |
747 | CODE: | |
748 | RETVAL = ENCODE_FB_XMLCREF; | |
749 | OUTPUT: | |
750 | RETVAL | |
751 | ||
33af2bc7 NIS |
752 | BOOT: |
753 | { | |
85982a32 | 754 | #include "def_t.h" |
e7cbefb8 | 755 | #include "def_t.exh" |
33af2bc7 | 756 | } |