Commit | Line | Data |
---|---|---|
2c674647 JH |
1 | #include "EXTERN.h" |
2 | #include "perl.h" | |
3 | #include "XSUB.h" | |
2f2b4ff2 NIS |
4 | #define U8 U8 |
5 | #include "encode.h" | |
6 | #include "iso8859.h" | |
7 | #include "EBCDIC.h" | |
8 | #include "Symbols.h" | |
2c674647 | 9 | |
67e989fb | 10 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ |
2f5768b8 | 11 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a | 12 | return (y)0; /* fool picky compilers */ \ |
87714904 | 13 | } |
67e989fb JH |
14 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
15 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) | |
16 | ||
33af2bc7 | 17 | #ifdef USE_PERLIO |
72e44f29 NIS |
18 | /* Define an encoding "layer" in the perliol.h sense. |
19 | The layer defined here "inherits" in an object-oriented sense from the | |
20 | "perlio" layer with its PerlIOBuf_* "methods". | |
21 | The implementation is particularly efficient as until Encode settles down | |
22 | there is no point in tryint to tune it. | |
23 | ||
24 | The layer works by overloading the "fill" and "flush" methods. | |
25 | ||
26 | "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API | |
27 | to convert the encoded data to UTF-8 form, then copies it back to the | |
28 | buffer. The "base class's" read methods then see the UTF-8 data. | |
29 | ||
30 | "flush" transforms the UTF-8 data deposited by the "base class's write | |
31 | method in the buffer back into the encoded form using the encode OO perl API, | |
32 | then copies data back into the buffer and calls "SUPER::flush. | |
33 | ||
34 | Note that "flush" is _also_ called for read mode - we still do the (back)-translate | |
35 | so that the the base class's "flush" sees the correct number of encoded chars | |
36 | for positioning the seek pointer. (This double translation is the worst performance | |
37 | issue - particularly with all-perl encode engine.) | |
38 | ||
39 | */ | |
40 | ||
41 | ||
33af2bc7 NIS |
42 | #include "perliol.h" |
43 | ||
44 | typedef struct | |
45 | { | |
46 | PerlIOBuf base; /* PerlIOBuf stuff */ | |
47 | SV * bufsv; | |
48 | SV * enc; | |
49 | } PerlIOEncode; | |
50 | ||
51 | ||
52 | IV | |
53 | PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len) | |
54 | { | |
55 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); | |
56 | dTHX; | |
57 | dSP; | |
58 | IV code; | |
59 | code = PerlIOBuf_pushed(f,mode,Nullch,0); | |
60 | ENTER; | |
61 | SAVETMPS; | |
62 | PUSHMARK(sp); | |
63 | XPUSHs(sv_2mortal(newSVpv("Encode",0))); | |
64 | XPUSHs(sv_2mortal(newSVpvn(arg,len))); | |
65 | PUTBACK; | |
66 | if (perl_call_method("getEncoding",G_SCALAR) != 1) | |
67 | return -1; | |
68 | SPAGAIN; | |
69 | e->enc = POPs; | |
70 | PUTBACK; | |
71 | if (!SvROK(e->enc)) | |
72 | return -1; | |
73 | SvREFCNT_inc(e->enc); | |
74 | FREETMPS; | |
75 | LEAVE; | |
76 | PerlIOBase(f)->flags |= PERLIO_F_UTF8; | |
77 | return code; | |
78 | } | |
79 | ||
80 | IV | |
81 | PerlIOEncode_popped(PerlIO *f) | |
82 | { | |
83 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); | |
84 | dTHX; | |
85 | if (e->enc) | |
86 | { | |
87 | SvREFCNT_dec(e->enc); | |
88 | e->enc = Nullsv; | |
89 | } | |
90 | if (e->bufsv) | |
91 | { | |
92 | SvREFCNT_dec(e->bufsv); | |
93 | e->bufsv = Nullsv; | |
94 | } | |
95 | return 0; | |
96 | } | |
97 | ||
98 | STDCHAR * | |
99 | PerlIOEncode_get_base(PerlIO *f) | |
100 | { | |
101 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); | |
102 | dTHX; | |
103 | if (!e->base.bufsiz) | |
104 | e->base.bufsiz = 1024; | |
105 | if (!e->bufsv) | |
106 | { | |
107 | e->bufsv = newSV(e->base.bufsiz); | |
108 | sv_setpvn(e->bufsv,"",0); | |
109 | } | |
62e8870c | 110 | e->base.buf = (STDCHAR *)SvPVX(e->bufsv); |
33af2bc7 NIS |
111 | if (!e->base.ptr) |
112 | e->base.ptr = e->base.buf; | |
113 | if (!e->base.end) | |
114 | e->base.end = e->base.buf; | |
115 | if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) | |
116 | { | |
117 | Perl_warn(aTHX_ " ptr %p(%p)%p", | |
118 | e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); | |
119 | abort(); | |
120 | } | |
121 | if (SvLEN(e->bufsv) < e->base.bufsiz) | |
122 | { | |
123 | SSize_t poff = e->base.ptr - e->base.buf; | |
124 | SSize_t eoff = e->base.end - e->base.buf; | |
62e8870c | 125 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,e->base.bufsiz); |
33af2bc7 NIS |
126 | e->base.ptr = e->base.buf + poff; |
127 | e->base.end = e->base.buf + eoff; | |
128 | } | |
129 | if (e->base.ptr < e->base.buf || e->base.ptr > e->base.buf+SvLEN(e->bufsv)) | |
130 | { | |
131 | Perl_warn(aTHX_ " ptr %p(%p)%p", | |
132 | e->base.buf,e->base.ptr,e->base.buf+SvLEN(e->bufsv)); | |
133 | abort(); | |
134 | } | |
135 | return e->base.buf; | |
136 | } | |
137 | ||
33af2bc7 NIS |
138 | IV |
139 | PerlIOEncode_fill(PerlIO *f) | |
140 | { | |
141 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); | |
142 | dTHX; | |
143 | dSP; | |
144 | IV code; | |
33af2bc7 NIS |
145 | code = PerlIOBuf_fill(f); |
146 | if (code == 0) | |
147 | { | |
148 | SV *uni; | |
72e44f29 NIS |
149 | STRLEN len; |
150 | char *s; | |
151 | /* Set SV that is the buffer to be buf..ptr */ | |
33af2bc7 NIS |
152 | SvCUR_set(e->bufsv, e->base.end - e->base.buf); |
153 | SvUTF8_off(e->bufsv); | |
154 | ENTER; | |
155 | SAVETMPS; | |
156 | PUSHMARK(sp); | |
157 | XPUSHs(e->enc); | |
158 | XPUSHs(e->bufsv); | |
159 | XPUSHs(&PL_sv_yes); | |
160 | PUTBACK; | |
161 | if (perl_call_method("toUnicode",G_SCALAR) != 1) | |
162 | code = -1; | |
163 | SPAGAIN; | |
164 | uni = POPs; | |
165 | PUTBACK; | |
72e44f29 NIS |
166 | /* Now get translated string (forced to UTF-8) and copy back to buffer |
167 | don't use sv_setsv as that may "steal" PV from returned temp | |
168 | and so free() our known-large-enough buffer. | |
169 | sv_setpvn() should do but let us do it long hand. | |
170 | */ | |
171 | s = SvPVutf8(uni,len); | |
172 | if (s != SvPVX(e->bufsv)) | |
173 | { | |
62e8870c | 174 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); |
72e44f29 NIS |
175 | Move(s,e->base.buf,len,char); |
176 | SvCUR_set(e->bufsv,len); | |
177 | } | |
178 | SvUTF8_on(e->bufsv); | |
179 | e->base.end = e->base.buf+len; | |
33af2bc7 NIS |
180 | e->base.ptr = e->base.buf; |
181 | FREETMPS; | |
182 | LEAVE; | |
183 | } | |
184 | return code; | |
185 | } | |
186 | ||
187 | IV | |
188 | PerlIOEncode_flush(PerlIO *f) | |
189 | { | |
190 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); | |
191 | IV code = 0; | |
192 | dTHX; | |
193 | if (e->bufsv && (PerlIOBase(f)->flags & (PERLIO_F_RDBUF|PERLIO_F_WRBUF))) | |
194 | { | |
195 | dSP; | |
196 | SV *str; | |
197 | char *s; | |
198 | STRLEN len; | |
72e44f29 NIS |
199 | SSize_t left = 0; |
200 | if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) | |
201 | { | |
202 | /* This is really just a flag to see if we took all the data, if | |
203 | we did PerlIOBase_flush avoids a seek to lower layer. | |
204 | Need to revisit if we start getting clever with unreads or seeks-in-buffer | |
205 | */ | |
206 | left = e->base.end - e->base.ptr; | |
207 | } | |
33af2bc7 NIS |
208 | ENTER; |
209 | SAVETMPS; | |
210 | PUSHMARK(sp); | |
211 | XPUSHs(e->enc); | |
72e44f29 | 212 | SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); |
33af2bc7 NIS |
213 | SvUTF8_on(e->bufsv); |
214 | XPUSHs(e->bufsv); | |
215 | XPUSHs(&PL_sv_yes); | |
216 | PUTBACK; | |
217 | if (perl_call_method("fromUnicode",G_SCALAR) != 1) | |
218 | code = -1; | |
219 | SPAGAIN; | |
220 | str = POPs; | |
221 | PUTBACK; | |
72e44f29 NIS |
222 | s = SvPV(str,len); |
223 | if (s != SvPVX(e->bufsv)) | |
224 | { | |
62e8870c | 225 | e->base.buf = (STDCHAR *)SvGROW(e->bufsv,len); |
72e44f29 NIS |
226 | Move(s,e->base.buf,len,char); |
227 | SvCUR_set(e->bufsv,len); | |
228 | } | |
33af2bc7 | 229 | SvUTF8_off(e->bufsv); |
72e44f29 NIS |
230 | e->base.ptr = e->base.buf+len; |
231 | /* restore end != ptr as inequality is used by PerlIOBuf_flush in read case */ | |
232 | e->base.end = e->base.ptr + left; | |
33af2bc7 NIS |
233 | FREETMPS; |
234 | LEAVE; | |
235 | if (PerlIOBuf_flush(f) != 0) | |
236 | code = -1; | |
237 | } | |
238 | return code; | |
239 | } | |
240 | ||
241 | IV | |
242 | PerlIOEncode_close(PerlIO *f) | |
243 | { | |
244 | PerlIOEncode *e = PerlIOSelf(f,PerlIOEncode); | |
245 | IV code = PerlIOBase_close(f); | |
246 | dTHX; | |
247 | if (e->bufsv) | |
248 | { | |
249 | SvREFCNT_dec(e->bufsv); | |
250 | e->bufsv = Nullsv; | |
251 | } | |
252 | e->base.buf = NULL; | |
253 | e->base.ptr = NULL; | |
254 | e->base.end = NULL; | |
255 | PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF|PERLIO_F_WRBUF); | |
256 | return code; | |
257 | } | |
258 | ||
72e44f29 NIS |
259 | Off_t |
260 | PerlIOEncode_tell(PerlIO *f) | |
261 | { | |
262 | PerlIOBuf *b = PerlIOSelf(f,PerlIOBuf); | |
263 | /* Unfortunately the only way to get a postion is to back-translate, | |
264 | the UTF8-bytes we have buf..ptr and adjust accordingly. | |
265 | But we will try and save any unread data in case stream | |
266 | is un-seekable. | |
267 | */ | |
268 | if ((PerlIOBase(f)->flags & PERLIO_F_RDBUF) && b->ptr < b->end) | |
269 | { | |
270 | Size_t count = b->end - b->ptr; | |
271 | PerlIO_push(f,&PerlIO_pending,"r",Nullch,0); | |
272 | /* Save what we have left to read */ | |
273 | PerlIOSelf(f,PerlIOBuf)->bufsiz = count; | |
274 | PerlIO_unread(f,b->ptr,count); | |
275 | /* There isn't any unread data - we just saved it - so avoid the lower seek */ | |
276 | b->end = b->ptr; | |
277 | /* Flush ourselves - now one layer down, | |
278 | this does the back translate and adjusts position | |
279 | */ | |
280 | PerlIO_flush(PerlIONext(f)); | |
281 | /* Set position of the saved data */ | |
282 | PerlIOSelf(f,PerlIOBuf)->posn = b->posn; | |
283 | } | |
284 | else | |
285 | { | |
286 | PerlIO_flush(f); | |
287 | } | |
288 | return b->posn; | |
289 | } | |
290 | ||
33af2bc7 | 291 | PerlIO_funcs PerlIO_encode = { |
72e44f29 | 292 | "encoding", |
33af2bc7 NIS |
293 | sizeof(PerlIOEncode), |
294 | PERLIO_K_BUFFERED, | |
295 | PerlIOBase_fileno, | |
296 | PerlIOBuf_fdopen, | |
297 | PerlIOBuf_open, | |
298 | PerlIOBuf_reopen, | |
299 | PerlIOEncode_pushed, | |
300 | PerlIOEncode_popped, | |
301 | PerlIOBuf_read, | |
302 | PerlIOBuf_unread, | |
303 | PerlIOBuf_write, | |
304 | PerlIOBuf_seek, | |
72e44f29 | 305 | PerlIOEncode_tell, |
33af2bc7 NIS |
306 | PerlIOEncode_close, |
307 | PerlIOEncode_flush, | |
308 | PerlIOEncode_fill, | |
309 | PerlIOBase_eof, | |
310 | PerlIOBase_error, | |
311 | PerlIOBase_clearerr, | |
312 | PerlIOBuf_setlinebuf, | |
313 | PerlIOEncode_get_base, | |
314 | PerlIOBuf_bufsiz, | |
315 | PerlIOBuf_get_ptr, | |
316 | PerlIOBuf_get_cnt, | |
317 | PerlIOBuf_set_ptrcnt, | |
318 | }; | |
319 | #endif | |
320 | ||
2f2b4ff2 NIS |
321 | void |
322 | Encode_Define(pTHX_ encode_t *enc) | |
323 | { | |
324 | HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI); | |
325 | HV *stash = gv_stashpv("Encode::XS", TRUE); | |
326 | SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); | |
327 | hv_store(hash,enc->name,strlen(enc->name),sv,0); | |
328 | } | |
329 | ||
183a2d84 | 330 | void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} |
67e989fb | 331 | |
2f2b4ff2 NIS |
332 | static SV * |
333 | encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) | |
334 | { | |
335 | STRLEN slen; | |
336 | U8 *s = (U8 *) SvPV(src,slen); | |
337 | SV *dst = sv_2mortal(newSV(2*slen+1)); | |
338 | if (slen) | |
339 | { | |
340 | U8 *d = (U8 *) SvGROW(dst, 2*slen+1); | |
341 | STRLEN dlen = SvLEN(dst); | |
342 | int code; | |
343 | while ((code = do_encode(dir,s,&slen,d,dlen,&dlen))) | |
344 | { | |
345 | SvCUR_set(dst,dlen); | |
346 | SvPOK_on(dst); | |
347 | switch(code) | |
348 | { | |
349 | case ENCODE_NOSPACE: | |
350 | { | |
351 | STRLEN need = (slen) ? (SvLEN(dst)*SvCUR(src)/slen) : (dlen + UTF8_MAXLEN); | |
352 | if (need <= SvLEN(dst)) | |
353 | need += UTF8_MAXLEN; | |
354 | d = (U8 *) SvGROW(dst, need); | |
355 | dlen = SvLEN(dst); | |
356 | slen = SvCUR(src); | |
357 | break; | |
358 | } | |
359 | ||
360 | case ENCODE_NOREP: | |
361 | if (dir == enc->f_utf8) | |
362 | { | |
363 | if (!check && ckWARN_d(WARN_UTF8)) | |
364 | { | |
365 | STRLEN clen; | |
366 | UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0); | |
367 | Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%x}\" does not map to %s", ch, enc->name); | |
368 | /* FIXME: Skip over the character, copy in replacement and continue | |
369 | * but that is messy so for now just fail. | |
370 | */ | |
371 | return &PL_sv_undef; | |
372 | } | |
373 | else | |
374 | { | |
375 | return &PL_sv_undef; | |
376 | } | |
377 | } | |
378 | else | |
379 | { | |
380 | /* UTF-8 is supposed to be "Universal" so should not happen */ | |
381 | Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", | |
382 | enc->name, (SvCUR(src)-slen),s+slen); | |
383 | } | |
384 | break; | |
385 | ||
386 | case ENCODE_PARTIAL: | |
387 | if (!check && ckWARN_d(WARN_UTF8)) | |
388 | { | |
389 | Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", | |
390 | (dir == enc->f_utf8) ? "UTF-8" : enc->name); | |
391 | } | |
392 | return &PL_sv_undef; | |
393 | ||
394 | default: | |
395 | Perl_croak(aTHX_ "Unexpected code %d converting %s %s", | |
396 | code, (dir == enc->f_utf8) ? "to" : "from",enc->name); | |
397 | return &PL_sv_undef; | |
398 | } | |
399 | } | |
400 | SvCUR_set(dst,dlen); | |
401 | SvPOK_on(dst); | |
402 | if (check) | |
403 | { | |
404 | if (slen < SvCUR(src)) | |
405 | { | |
406 | Move(s+slen,s,SvCUR(src)-slen,U8); | |
407 | } | |
408 | SvCUR_set(src,SvCUR(src)-slen); | |
409 | } | |
410 | } | |
411 | return dst; | |
412 | } | |
413 | ||
414 | MODULE = Encode PACKAGE = Encode::XS PREFIX = Encode_ | |
415 | ||
416 | PROTOTYPES: ENABLE | |
417 | ||
418 | void | |
419 | Encode_toUnicode(obj,src,check = 0) | |
420 | SV * obj | |
421 | SV * src | |
422 | int check | |
423 | CODE: | |
424 | { | |
425 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); | |
426 | ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check); | |
427 | SvUTF8_on(ST(0)); | |
428 | XSRETURN(1); | |
429 | } | |
430 | ||
431 | void | |
432 | Encode_fromUnicode(obj,src,check = 0) | |
433 | SV * obj | |
434 | SV * src | |
435 | int check | |
436 | CODE: | |
437 | { | |
438 | encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); | |
439 | sv_utf8_upgrade(src); | |
440 | ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check); | |
441 | XSRETURN(1); | |
442 | } | |
443 | ||
67e989fb | 444 | MODULE = Encode PACKAGE = Encode |
2c674647 JH |
445 | |
446 | PROTOTYPES: ENABLE | |
447 | ||
67e989fb | 448 | I32 |
2c674647 | 449 | _bytes_to_utf8(sv, ...) |
67e989fb | 450 | SV * sv |
2c674647 | 451 | CODE: |
67e989fb JH |
452 | { |
453 | SV * encoding = items == 2 ? ST(1) : Nullsv; | |
454 | ||
455 | if (encoding) | |
456 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); | |
457 | else { | |
458 | STRLEN len; | |
183a2d84 | 459 | U8* s = (U8*)SvPV(sv, len); |
67e989fb JH |
460 | U8* converted; |
461 | ||
462 | converted = bytes_to_utf8(s, &len); /* This allocs */ | |
183a2d84 | 463 | sv_setpvn(sv, (char *)converted, len); |
67e989fb JH |
464 | SvUTF8_on(sv); /* XXX Should we? */ |
465 | Safefree(converted); /* ... so free it */ | |
466 | RETVAL = len; | |
467 | } | |
468 | } | |
2c674647 | 469 | OUTPUT: |
67e989fb | 470 | RETVAL |
2c674647 | 471 | |
67e989fb | 472 | I32 |
2c674647 | 473 | _utf8_to_bytes(sv, ...) |
67e989fb | 474 | SV * sv |
2c674647 | 475 | CODE: |
67e989fb JH |
476 | { |
477 | SV * to = items > 1 ? ST(1) : Nullsv; | |
478 | SV * check = items > 2 ? ST(2) : Nullsv; | |
87714904 | 479 | |
67e989fb JH |
480 | if (to) |
481 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); | |
482 | else { | |
67e989fb | 483 | STRLEN len; |
b113ac0e | 484 | U8 *s = (U8*)SvPV(sv, len); |
67e989fb JH |
485 | |
486 | if (SvTRUE(check)) { | |
487 | /* Must do things the slow way */ | |
488 | U8 *dest; | |
87714904 | 489 | U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ |
67e989fb JH |
490 | U8 *send = s + len; |
491 | ||
492 | New(83, dest, len, U8); /* I think */ | |
493 | ||
494 | while (s < send) { | |
495 | if (*s < 0x80) | |
496 | *dest++ = *s++; | |
497 | else { | |
b113ac0e JH |
498 | STRLEN ulen; |
499 | UV uv = *s++; | |
87714904 | 500 | |
67e989fb JH |
501 | /* Have to do it all ourselves because of error routine, |
502 | aargh. */ | |
503 | if (!(uv & 0x40)) | |
504 | goto failure; | |
505 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } | |
506 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } | |
507 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } | |
508 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } | |
509 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } | |
510 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } | |
511 | else { ulen = 13; uv = 0; } | |
87714904 | 512 | |
67e989fb JH |
513 | /* Note change to utf8.c variable naming, for variety */ |
514 | while (ulen--) { | |
515 | if ((*s & 0xc0) != 0x80) | |
516 | goto failure; | |
87714904 | 517 | |
67e989fb JH |
518 | else |
519 | uv = (uv << 6) | (*s++ & 0x3f); | |
87714904 | 520 | } |
67e989fb JH |
521 | if (uv > 256) { |
522 | failure: | |
523 | call_failure(check, s, dest, src); | |
524 | /* Now what happens? */ | |
525 | } | |
526 | *dest++ = (U8)uv; | |
527 | } | |
528 | } | |
529 | } else | |
530 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); | |
531 | } | |
2c674647 JH |
532 | } |
533 | OUTPUT: | |
534 | RETVAL | |
535 | ||
536 | SV * | |
537 | _chars_to_utf8(sv, from, ...) | |
538 | SV * sv | |
539 | SV * from | |
540 | CODE: | |
541 | { | |
542 | SV * check = items == 3 ? ST(2) : Nullsv; | |
543 | RETVAL = &PL_sv_undef; | |
544 | } | |
545 | OUTPUT: | |
546 | RETVAL | |
547 | ||
548 | SV * | |
549 | _utf8_to_chars(sv, to, ...) | |
550 | SV * sv | |
551 | SV * to | |
552 | CODE: | |
553 | { | |
554 | SV * check = items == 3 ? ST(2) : Nullsv; | |
555 | RETVAL = &PL_sv_undef; | |
556 | } | |
557 | OUTPUT: | |
558 | RETVAL | |
559 | ||
560 | SV * | |
561 | _utf8_to_chars_check(sv, ...) | |
562 | SV * sv | |
563 | CODE: | |
564 | { | |
565 | SV * check = items == 2 ? ST(1) : Nullsv; | |
566 | RETVAL = &PL_sv_undef; | |
567 | } | |
568 | OUTPUT: | |
569 | RETVAL | |
570 | ||
571 | SV * | |
572 | _bytes_to_chars(sv, from, ...) | |
573 | SV * sv | |
574 | SV * from | |
575 | CODE: | |
576 | { | |
577 | SV * check = items == 3 ? ST(2) : Nullsv; | |
578 | RETVAL = &PL_sv_undef; | |
579 | } | |
580 | OUTPUT: | |
581 | RETVAL | |
582 | ||
583 | SV * | |
584 | _chars_to_bytes(sv, to, ...) | |
585 | SV * sv | |
586 | SV * to | |
587 | CODE: | |
588 | { | |
589 | SV * check = items == 3 ? ST(2) : Nullsv; | |
590 | RETVAL = &PL_sv_undef; | |
591 | } | |
592 | OUTPUT: | |
593 | RETVAL | |
594 | ||
595 | SV * | |
596 | _from_to(sv, from, to, ...) | |
597 | SV * sv | |
598 | SV * from | |
599 | SV * to | |
600 | CODE: | |
601 | { | |
602 | SV * check = items == 4 ? ST(3) : Nullsv; | |
603 | RETVAL = &PL_sv_undef; | |
604 | } | |
605 | OUTPUT: | |
606 | RETVAL | |
607 | ||
608 | bool | |
609 | _is_utf8(sv, ...) | |
610 | SV * sv | |
611 | CODE: | |
612 | { | |
613 | SV * check = items == 2 ? ST(1) : Nullsv; | |
614 | if (SvPOK(sv)) { | |
067a85ef | 615 | RETVAL = SvUTF8(sv) ? 1 : 0; |
2c674647 JH |
616 | if (RETVAL && |
617 | SvTRUE(check) && | |
618 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) | |
619 | RETVAL = FALSE; | |
620 | } else { | |
621 | RETVAL = FALSE; | |
622 | } | |
623 | } | |
624 | OUTPUT: | |
625 | RETVAL | |
626 | ||
627 | SV * | |
628 | _on_utf8(sv) | |
629 | SV * sv | |
630 | CODE: | |
631 | { | |
632 | if (SvPOK(sv)) { | |
87714904 | 633 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 JH |
634 | RETVAL = rsv; |
635 | SvUTF8_on(sv); | |
636 | } else { | |
637 | RETVAL = &PL_sv_undef; | |
638 | } | |
639 | } | |
640 | OUTPUT: | |
641 | RETVAL | |
642 | ||
643 | SV * | |
644 | _off_utf8(sv) | |
645 | SV * sv | |
646 | CODE: | |
647 | { | |
648 | if (SvPOK(sv)) { | |
87714904 | 649 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 JH |
650 | RETVAL = rsv; |
651 | SvUTF8_off(sv); | |
652 | } else { | |
653 | RETVAL = &PL_sv_undef; | |
654 | } | |
655 | } | |
656 | OUTPUT: | |
657 | RETVAL | |
658 | ||
659 | SV * | |
660 | _utf_to_utf(sv, from, to, ...) | |
661 | SV * sv | |
662 | SV * from | |
663 | SV * to | |
664 | CODE: | |
665 | { | |
666 | SV * check = items == 4 ? ST(3) : Nullsv; | |
667 | RETVAL = &PL_sv_undef; | |
668 | } | |
669 | OUTPUT: | |
670 | RETVAL | |
671 | ||
33af2bc7 NIS |
672 | BOOT: |
673 | { | |
674 | #ifdef USE_PERLIO | |
675 | PerlIO_define_layer(&PerlIO_encode); | |
676 | #endif | |
2f2b4ff2 NIS |
677 | #include "iso8859.def" |
678 | #include "EBCDIC.def" | |
679 | #include "Symbols.def" | |
33af2bc7 | 680 | } |