Commit | Line | Data |
---|---|---|
2c674647 JH |
1 | #include "EXTERN.h" |
2 | #include "perl.h" | |
3 | #include "XSUB.h" | |
4 | ||
67e989fb | 5 | #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ |
2f5768b8 | 6 | Perl_croak(aTHX_ "panic_unimplemented"); \ |
4a83738a | 7 | return (y)0; /* fool picky compilers */ \ |
87714904 | 8 | } |
67e989fb JH |
9 | UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
10 | UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) | |
11 | ||
183a2d84 | 12 | void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} |
67e989fb JH |
13 | |
14 | MODULE = Encode PACKAGE = Encode | |
2c674647 JH |
15 | |
16 | PROTOTYPES: ENABLE | |
17 | ||
67e989fb | 18 | I32 |
2c674647 | 19 | _bytes_to_utf8(sv, ...) |
67e989fb | 20 | SV * sv |
2c674647 | 21 | CODE: |
67e989fb JH |
22 | { |
23 | SV * encoding = items == 2 ? ST(1) : Nullsv; | |
24 | ||
25 | if (encoding) | |
26 | RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); | |
27 | else { | |
28 | STRLEN len; | |
183a2d84 | 29 | U8* s = (U8*)SvPV(sv, len); |
67e989fb JH |
30 | U8* converted; |
31 | ||
32 | converted = bytes_to_utf8(s, &len); /* This allocs */ | |
183a2d84 | 33 | sv_setpvn(sv, (char *)converted, len); |
67e989fb JH |
34 | SvUTF8_on(sv); /* XXX Should we? */ |
35 | Safefree(converted); /* ... so free it */ | |
36 | RETVAL = len; | |
37 | } | |
38 | } | |
2c674647 | 39 | OUTPUT: |
67e989fb | 40 | RETVAL |
2c674647 | 41 | |
67e989fb | 42 | I32 |
2c674647 | 43 | _utf8_to_bytes(sv, ...) |
67e989fb | 44 | SV * sv |
2c674647 | 45 | CODE: |
67e989fb JH |
46 | { |
47 | SV * to = items > 1 ? ST(1) : Nullsv; | |
48 | SV * check = items > 2 ? ST(2) : Nullsv; | |
87714904 | 49 | |
67e989fb JH |
50 | if (to) |
51 | RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); | |
52 | else { | |
67e989fb | 53 | STRLEN len; |
b113ac0e | 54 | U8 *s = (U8*)SvPV(sv, len); |
67e989fb JH |
55 | |
56 | if (SvTRUE(check)) { | |
57 | /* Must do things the slow way */ | |
58 | U8 *dest; | |
87714904 | 59 | U8 *src = (U8*)savepv((char *)s); /* We need a copy to pass to check() */ |
67e989fb JH |
60 | U8 *send = s + len; |
61 | ||
62 | New(83, dest, len, U8); /* I think */ | |
63 | ||
64 | while (s < send) { | |
65 | if (*s < 0x80) | |
66 | *dest++ = *s++; | |
67 | else { | |
b113ac0e JH |
68 | STRLEN ulen; |
69 | UV uv = *s++; | |
87714904 | 70 | |
67e989fb JH |
71 | /* Have to do it all ourselves because of error routine, |
72 | aargh. */ | |
73 | if (!(uv & 0x40)) | |
74 | goto failure; | |
75 | if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } | |
76 | else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } | |
77 | else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } | |
78 | else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } | |
79 | else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } | |
80 | else if (!(uv & 0x01)) { ulen = 7; uv = 0; } | |
81 | else { ulen = 13; uv = 0; } | |
87714904 | 82 | |
67e989fb JH |
83 | /* Note change to utf8.c variable naming, for variety */ |
84 | while (ulen--) { | |
85 | if ((*s & 0xc0) != 0x80) | |
86 | goto failure; | |
87714904 | 87 | |
67e989fb JH |
88 | else |
89 | uv = (uv << 6) | (*s++ & 0x3f); | |
87714904 | 90 | } |
67e989fb JH |
91 | if (uv > 256) { |
92 | failure: | |
93 | call_failure(check, s, dest, src); | |
94 | /* Now what happens? */ | |
95 | } | |
96 | *dest++ = (U8)uv; | |
97 | } | |
98 | } | |
99 | } else | |
100 | RETVAL = (utf8_to_bytes(s, &len) ? len : 0); | |
101 | } | |
2c674647 JH |
102 | } |
103 | OUTPUT: | |
104 | RETVAL | |
105 | ||
106 | SV * | |
107 | _chars_to_utf8(sv, from, ...) | |
108 | SV * sv | |
109 | SV * from | |
110 | CODE: | |
111 | { | |
112 | SV * check = items == 3 ? ST(2) : Nullsv; | |
113 | RETVAL = &PL_sv_undef; | |
114 | } | |
115 | OUTPUT: | |
116 | RETVAL | |
117 | ||
118 | SV * | |
119 | _utf8_to_chars(sv, to, ...) | |
120 | SV * sv | |
121 | SV * to | |
122 | CODE: | |
123 | { | |
124 | SV * check = items == 3 ? ST(2) : Nullsv; | |
125 | RETVAL = &PL_sv_undef; | |
126 | } | |
127 | OUTPUT: | |
128 | RETVAL | |
129 | ||
130 | SV * | |
131 | _utf8_to_chars_check(sv, ...) | |
132 | SV * sv | |
133 | CODE: | |
134 | { | |
135 | SV * check = items == 2 ? ST(1) : Nullsv; | |
136 | RETVAL = &PL_sv_undef; | |
137 | } | |
138 | OUTPUT: | |
139 | RETVAL | |
140 | ||
141 | SV * | |
142 | _bytes_to_chars(sv, from, ...) | |
143 | SV * sv | |
144 | SV * from | |
145 | CODE: | |
146 | { | |
147 | SV * check = items == 3 ? ST(2) : Nullsv; | |
148 | RETVAL = &PL_sv_undef; | |
149 | } | |
150 | OUTPUT: | |
151 | RETVAL | |
152 | ||
153 | SV * | |
154 | _chars_to_bytes(sv, to, ...) | |
155 | SV * sv | |
156 | SV * to | |
157 | CODE: | |
158 | { | |
159 | SV * check = items == 3 ? ST(2) : Nullsv; | |
160 | RETVAL = &PL_sv_undef; | |
161 | } | |
162 | OUTPUT: | |
163 | RETVAL | |
164 | ||
165 | SV * | |
166 | _from_to(sv, from, to, ...) | |
167 | SV * sv | |
168 | SV * from | |
169 | SV * to | |
170 | CODE: | |
171 | { | |
172 | SV * check = items == 4 ? ST(3) : Nullsv; | |
173 | RETVAL = &PL_sv_undef; | |
174 | } | |
175 | OUTPUT: | |
176 | RETVAL | |
177 | ||
178 | bool | |
179 | _is_utf8(sv, ...) | |
180 | SV * sv | |
181 | CODE: | |
182 | { | |
183 | SV * check = items == 2 ? ST(1) : Nullsv; | |
184 | if (SvPOK(sv)) { | |
185 | RETVAL = SvUTF8(sv); | |
186 | if (RETVAL && | |
187 | SvTRUE(check) && | |
188 | !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) | |
189 | RETVAL = FALSE; | |
190 | } else { | |
191 | RETVAL = FALSE; | |
192 | } | |
193 | } | |
194 | OUTPUT: | |
195 | RETVAL | |
196 | ||
197 | SV * | |
198 | _on_utf8(sv) | |
199 | SV * sv | |
200 | CODE: | |
201 | { | |
202 | if (SvPOK(sv)) { | |
87714904 | 203 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 JH |
204 | RETVAL = rsv; |
205 | SvUTF8_on(sv); | |
206 | } else { | |
207 | RETVAL = &PL_sv_undef; | |
208 | } | |
209 | } | |
210 | OUTPUT: | |
211 | RETVAL | |
212 | ||
213 | SV * | |
214 | _off_utf8(sv) | |
215 | SV * sv | |
216 | CODE: | |
217 | { | |
218 | if (SvPOK(sv)) { | |
87714904 | 219 | SV *rsv = newSViv(SvUTF8(sv)); |
2c674647 JH |
220 | RETVAL = rsv; |
221 | SvUTF8_off(sv); | |
222 | } else { | |
223 | RETVAL = &PL_sv_undef; | |
224 | } | |
225 | } | |
226 | OUTPUT: | |
227 | RETVAL | |
228 | ||
229 | SV * | |
230 | _utf_to_utf(sv, from, to, ...) | |
231 | SV * sv | |
232 | SV * from | |
233 | SV * to | |
234 | CODE: | |
235 | { | |
236 | SV * check = items == 4 ? ST(3) : Nullsv; | |
237 | RETVAL = &PL_sv_undef; | |
238 | } | |
239 | OUTPUT: | |
240 | RETVAL | |
241 |