This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [ID 20001023.003] PATCH perlfaq5 [perl-current]
[perl5.git] / ext / Encode / Encode.xs
CommitLineData
2c674647
JH
1#include "EXTERN.h"
2#include "perl.h"
3#include "XSUB.h"
4
67e989fb 5#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \
2f5768b8 6 Perl_croak(aTHX_ "panic_unimplemented"); \
4a83738a 7 return (y)0; /* fool picky compilers */ \
87714904 8 }
67e989fb
JH
9UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
10UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
11
183a2d84 12void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
67e989fb
JH
13
14MODULE = Encode PACKAGE = Encode
2c674647
JH
15
16PROTOTYPES: ENABLE
17
67e989fb 18I32
2c674647 19_bytes_to_utf8(sv, ...)
67e989fb 20 SV * sv
2c674647 21 CODE:
67e989fb
JH
22 {
23 SV * encoding = items == 2 ? ST(1) : Nullsv;
24
25 if (encoding)
26 RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
27 else {
28 STRLEN len;
183a2d84 29 U8* s = (U8*)SvPV(sv, len);
67e989fb
JH
30 U8* converted;
31
32 converted = bytes_to_utf8(s, &len); /* This allocs */
183a2d84 33 sv_setpvn(sv, (char *)converted, len);
67e989fb
JH
34 SvUTF8_on(sv); /* XXX Should we? */
35 Safefree(converted); /* ... so free it */
36 RETVAL = len;
37 }
38 }
2c674647 39 OUTPUT:
67e989fb 40 RETVAL
2c674647 41
67e989fb 42I32
2c674647 43_utf8_to_bytes(sv, ...)
67e989fb 44 SV * sv
2c674647 45 CODE:
67e989fb
JH
46 {
47 SV * to = items > 1 ? ST(1) : Nullsv;
48 SV * check = items > 2 ? ST(2) : Nullsv;
87714904 49
67e989fb
JH
50 if (to)
51 RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
52 else {
67e989fb 53 STRLEN len;
b113ac0e 54 U8 *s = (U8*)SvPV(sv, len);
67e989fb
JH
55
56 if (SvTRUE(check)) {
57 /* Must do things the slow way */
58 U8 *dest;
87714904 59 U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */
67e989fb
JH
60 U8 *send = s + len;
61
62 New(83, dest, len, U8); /* I think */
63
64 while (s < send) {
65 if (*s < 0x80)
66 *dest++ = *s++;
67 else {
b113ac0e
JH
68 STRLEN ulen;
69 UV uv = *s++;
87714904 70
67e989fb
JH
71 /* Have to do it all ourselves because of error routine,
72 aargh. */
73 if (!(uv & 0x40))
74 goto failure;
75 if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; }
76 else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; }
77 else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; }
78 else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; }
79 else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; }
80 else if (!(uv & 0x01)) { ulen = 7; uv = 0; }
81 else { ulen = 13; uv = 0; }
87714904 82
67e989fb
JH
83 /* Note change to utf8.c variable naming, for variety */
84 while (ulen--) {
85 if ((*s & 0xc0) != 0x80)
86 goto failure;
87714904 87
67e989fb
JH
88 else
89 uv = (uv << 6) | (*s++ & 0x3f);
87714904 90 }
67e989fb
JH
91 if (uv > 256) {
92 failure:
93 call_failure(check, s, dest, src);
94 /* Now what happens? */
95 }
96 *dest++ = (U8)uv;
97 }
98 }
99 } else
100 RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
101 }
2c674647
JH
102 }
103 OUTPUT:
104 RETVAL
105
106SV *
107_chars_to_utf8(sv, from, ...)
108 SV * sv
109 SV * from
110 CODE:
111 {
112 SV * check = items == 3 ? ST(2) : Nullsv;
113 RETVAL = &PL_sv_undef;
114 }
115 OUTPUT:
116 RETVAL
117
118SV *
119_utf8_to_chars(sv, to, ...)
120 SV * sv
121 SV * to
122 CODE:
123 {
124 SV * check = items == 3 ? ST(2) : Nullsv;
125 RETVAL = &PL_sv_undef;
126 }
127 OUTPUT:
128 RETVAL
129
130SV *
131_utf8_to_chars_check(sv, ...)
132 SV * sv
133 CODE:
134 {
135 SV * check = items == 2 ? ST(1) : Nullsv;
136 RETVAL = &PL_sv_undef;
137 }
138 OUTPUT:
139 RETVAL
140
141SV *
142_bytes_to_chars(sv, from, ...)
143 SV * sv
144 SV * from
145 CODE:
146 {
147 SV * check = items == 3 ? ST(2) : Nullsv;
148 RETVAL = &PL_sv_undef;
149 }
150 OUTPUT:
151 RETVAL
152
153SV *
154_chars_to_bytes(sv, to, ...)
155 SV * sv
156 SV * to
157 CODE:
158 {
159 SV * check = items == 3 ? ST(2) : Nullsv;
160 RETVAL = &PL_sv_undef;
161 }
162 OUTPUT:
163 RETVAL
164
165SV *
166_from_to(sv, from, to, ...)
167 SV * sv
168 SV * from
169 SV * to
170 CODE:
171 {
172 SV * check = items == 4 ? ST(3) : Nullsv;
173 RETVAL = &PL_sv_undef;
174 }
175 OUTPUT:
176 RETVAL
177
178bool
179_is_utf8(sv, ...)
180 SV * sv
181 CODE:
182 {
183 SV * check = items == 2 ? ST(1) : Nullsv;
184 if (SvPOK(sv)) {
185 RETVAL = SvUTF8(sv);
186 if (RETVAL &&
187 SvTRUE(check) &&
188 !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
189 RETVAL = FALSE;
190 } else {
191 RETVAL = FALSE;
192 }
193 }
194 OUTPUT:
195 RETVAL
196
197SV *
198_on_utf8(sv)
199 SV * sv
200 CODE:
201 {
202 if (SvPOK(sv)) {
87714904 203 SV *rsv = newSViv(SvUTF8(sv));
2c674647
JH
204 RETVAL = rsv;
205 SvUTF8_on(sv);
206 } else {
207 RETVAL = &PL_sv_undef;
208 }
209 }
210 OUTPUT:
211 RETVAL
212
213SV *
214_off_utf8(sv)
215 SV * sv
216 CODE:
217 {
218 if (SvPOK(sv)) {
87714904 219 SV *rsv = newSViv(SvUTF8(sv));
2c674647
JH
220 RETVAL = rsv;
221 SvUTF8_off(sv);
222 } else {
223 RETVAL = &PL_sv_undef;
224 }
225 }
226 OUTPUT:
227 RETVAL
228
229SV *
230_utf_to_utf(sv, from, to, ...)
231 SV * sv
232 SV * from
233 SV * to
234 CODE:
235 {
236 SV * check = items == 4 ? ST(3) : Nullsv;
237 RETVAL = &PL_sv_undef;
238 }
239 OUTPUT:
240 RETVAL
241