This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update CPAN-Meta to CPAN version 2.110930
[perl5.git] / cpan / Unicode-Collate / Collate.xs
CommitLineData
f58b9ef1
CBW
1#include "EXTERN.h"
2#include "perl.h"
3#include "XSUB.h"
4
5/* This file is prepared by mkheader */
6#include "ucatbl.h"
7
8/* Perl 5.6.1 ? */
9#ifndef utf8n_to_uvuni
10#define utf8n_to_uvuni utf8_to_uv
11#endif /* utf8n_to_uvuni */
12
13/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
14#ifndef UTF8_ALLOW_BOM
15#define UTF8_ALLOW_BOM (0)
16#endif /* UTF8_ALLOW_BOM */
17
18#ifndef UTF8_ALLOW_SURROGATE
19#define UTF8_ALLOW_SURROGATE (0)
20#endif /* UTF8_ALLOW_SURROGATE */
21
22#ifndef UTF8_ALLOW_FE_FF
23#define UTF8_ALLOW_FE_FF (0)
24#endif /* UTF8_ALLOW_FE_FF */
25
26#ifndef UTF8_ALLOW_FFFF
27#define UTF8_ALLOW_FFFF (0)
28#endif /* UTF8_ALLOW_FFFF */
29
30#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
31
32/* if utf8n_to_uvuni() sets retlen to 0 (?) */
33#define ErrRetlenIsZero "panic (Unicode::Collate): zero-length character"
34
35/* At present, char > 0x10ffff are unaffected without complaint, right? */
36#define VALID_UTF_MAX (0x10ffff)
37#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
38
39static const UV max_div_16 = UV_MAX / 16;
40
41/* Supported Levels */
42#define MinLevel (1)
43#define MaxLevel (4)
44
45/* Shifted weight at 4th level */
46#define Shift4Wt (0xFFFF)
47
48#define VCE_Length (9)
49
50#define Hangul_SBase (0xAC00)
51#define Hangul_SIni (0xAC00)
52#define Hangul_SFin (0xD7A3)
53#define Hangul_NCount (588)
54#define Hangul_TCount (28)
55#define Hangul_LBase (0x1100)
56#define Hangul_LIni (0x1100)
57#define Hangul_LFin (0x1159)
58#define Hangul_LFill (0x115F)
59#define Hangul_LEnd (0x115F) /* Unicode 5.2 */
60#define Hangul_VBase (0x1161)
61#define Hangul_VIni (0x1160) /* from Vowel Filler */
62#define Hangul_VFin (0x11A2)
63#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */
64#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */
65#define Hangul_TIni (0x11A8)
66#define Hangul_TFin (0x11F9)
67#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */
68#define HangulL2Ini (0xA960) /* Unicode 5.2 */
69#define HangulL2Fin (0xA97C) /* Unicode 5.2 */
70#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */
71#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */
72#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */
73#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */
74
75#define CJK_UidIni (0x4E00)
76#define CJK_UidFin (0x9FA5)
77#define CJK_UidF41 (0x9FBB)
78#define CJK_UidF51 (0x9FC3)
79#define CJK_UidF52 (0x9FCB)
80#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */
81#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */
82#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */
83#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */
84#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */
85#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */
86#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */
87#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */
88
89static STDCHAR UnifiedCompat[] = {
90 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
91}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
92
93#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode))
94
95MODULE = Unicode::Collate PACKAGE = Unicode::Collate
96
97PROTOTYPES: DISABLE
98
99void
100_fetch_rest ()
101 PREINIT:
102 char ** rest;
103 PPCODE:
104 for (rest = UCA_rest; *rest; ++rest) {
105 XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
106 }
107
108
109void
110_fetch_simple (uv)
111 UV uv
112 PREINIT:
113 U8 ***plane, **row;
114 U8* result = NULL;
115 PPCODE:
116 if (!OVER_UTF_MAX(uv)){
117 plane = (U8***)UCA_simple[uv >> 16];
118 if (plane) {
119 row = plane[(uv >> 8) & 0xff];
120 result = row ? row[uv & 0xff] : NULL;
121 }
122 }
123 if (result) {
124 int i;
125 int num = (int)*result;
126 ++result;
127 for (i = 0; i < num; ++i) {
128 XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
129 result += VCE_Length;
130 }
131 } else {
132 XPUSHs(sv_2mortal(newSViv(0)));
133 }
134
135SV*
136_ignorable_simple (uv)
137 UV uv
138 ALIAS:
139 _exists_simple = 1
140 PREINIT:
141 U8 ***plane, **row;
142 int num = -1;
143 U8* result = NULL;
144 CODE:
145 if (!OVER_UTF_MAX(uv)){
146 plane = (U8***)UCA_simple[uv >> 16];
147 if (plane) {
148 row = plane[(uv >> 8) & 0xff];
149 result = row ? row[uv & 0xff] : NULL;
150 }
151 if (result)
152 num = (int)*result; /* assuming 0 <= num < 128 */
153 }
154
155 if (ix)
156 RETVAL = boolSV(num >0);
157 else
158 RETVAL = boolSV(num==0);
159 OUTPUT:
160 RETVAL
161
162
163void
164_getHexArray (src)
165 SV* src
166 PREINIT:
167 char *s, *e;
168 STRLEN byte;
169 UV value;
170 bool overflowed = FALSE;
171 const char *hexdigit;
172 PPCODE:
173 s = SvPV(src,byte);
174 for (e = s + byte; s < e;) {
175 hexdigit = strchr((char *) PL_hexdigit, *s++);
176 if (! hexdigit)
177 continue;
178 value = (hexdigit - PL_hexdigit) & 0xF;
179 while (*s) {
180 hexdigit = strchr((char *) PL_hexdigit, *s++);
181 if (! hexdigit)
182 break;
183 if (overflowed)
184 continue;
185 if (value > max_div_16) {
186 overflowed = TRUE;
187 continue;
188 }
189 value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
190 }
191 XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
192 }
193
194
195SV*
196_isIllegal (sv)
197 SV* sv
198 PREINIT:
199 UV uv;
200 CODE:
201 if (!sv || !SvIOK(sv))
202 XSRETURN_YES;
203 uv = SvUVX(sv);
204 RETVAL = boolSV(
205 0x10FFFF < uv /* out of range */
206 );
207OUTPUT:
208 RETVAL
209
210
211SV*
212_isNonchar (sv)
213 SV* sv
214 PREINIT:
215 UV uv;
216 CODE:
217 /* should be called only if ! _isIllegal(sv). */
218 uv = SvUVX(sv);
219 RETVAL = boolSV(
220 ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */
221 || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */
222 || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */
223 );
224OUTPUT:
225 RETVAL
226
227
228void
229_decompHangul (code)
230 UV code
231 PREINIT:
232 UV sindex, lindex, vindex, tindex;
233 PPCODE:
234 /* code *must* be in Hangul syllable.
235 * Check it before you enter here. */
236 sindex = code - Hangul_SBase;
237 lindex = sindex / Hangul_NCount;
238 vindex = (sindex % Hangul_NCount) / Hangul_TCount;
239 tindex = sindex % Hangul_TCount;
240
241 XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
242 XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
243 if (tindex)
244 XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
245
246
247SV*
248getHST (code, uca_vers = 0)
249 UV code;
250 IV uca_vers;
251 PREINIT:
252 char * hangtype;
253 STRLEN typelen;
254 CODE:
255 if (codeRange(Hangul_SIni, Hangul_SFin)) {
256 if ((code - Hangul_SBase) % Hangul_TCount) {
257 hangtype = "LVT"; typelen = 3;
258 } else {
259 hangtype = "LV"; typelen = 2;
260 }
261 } else if (uca_vers < 20) {
262 if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
263 hangtype = "L"; typelen = 1;
264 } else if (codeRange(Hangul_VIni, Hangul_VFin)) {
265 hangtype = "V"; typelen = 1;
266 } else if (codeRange(Hangul_TIni, Hangul_TFin)) {
267 hangtype = "T"; typelen = 1;
268 } else {
269 hangtype = ""; typelen = 0;
270 }
271 } else {
272 if (codeRange(Hangul_LIni, Hangul_LEnd) ||
273 codeRange(HangulL2Ini, HangulL2Fin)) {
274 hangtype = "L"; typelen = 1;
275 } else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
276 codeRange(HangulV2Ini, HangulV2Fin)) {
277 hangtype = "V"; typelen = 1;
278 } else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
279 codeRange(HangulT2Ini, HangulT2Fin)) {
280 hangtype = "T"; typelen = 1;
281 } else {
282 hangtype = ""; typelen = 0;
283 }
284 }
285
286 RETVAL = newSVpvn(hangtype, typelen);
287OUTPUT:
288 RETVAL
289
290
291void
292_derivCE_9 (code)
293 UV code
294 ALIAS:
295 _derivCE_14 = 1
296 _derivCE_18 = 2
297 _derivCE_20 = 3
298 _derivCE_22 = 4
299 PREINIT:
300 UV base, aaaa, bbbb;
301 U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
302 U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
303 bool basic_unified = 0;
304 PPCODE:
305 if (CJK_UidIni <= code) {
306 if (codeRange(0xFA0E, 0xFA29))
307 basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
308 else
309 basic_unified = (ix >= 3 ? (code <= CJK_UidF52) :
310 ix == 2 ? (code <= CJK_UidF51) :
311 ix == 1 ? (code <= CJK_UidF41) :
312 (code <= CJK_UidFin));
313 }
314 base = (basic_unified)
315 ? 0xFB40 : /* CJK */
316 ((codeRange(CJK_ExtAIni, CJK_ExtAFin))
317 ||
318 (codeRange(CJK_ExtBIni, CJK_ExtBFin))
319 ||
320 (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
321 ||
322 (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)))
323 ? 0xFB80 /* CJK ext. */
324 : 0xFBC0; /* others */
325 aaaa = base + (code >> 15);
326 bbbb = (code & 0x7FFF) | 0x8000;
327 a[1] = (U8)(aaaa >> 8);
328 a[2] = (U8)(aaaa & 0xFF);
329 b[1] = (U8)(bbbb >> 8);
330 b[2] = (U8)(bbbb & 0xFF);
331 a[7] = b[7] = (U8)(code >> 8);
332 a[8] = b[8] = (U8)(code & 0xFF);
333 XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
334 XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
335
336
337void
338_derivCE_8 (code)
339 UV code
340 PREINIT:
341 UV aaaa, bbbb;
342 U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF";
343 U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
344 PPCODE:
345 aaaa = 0xFF80 + (code >> 15);
346 bbbb = (code & 0x7FFF) | 0x8000;
347 a[1] = (U8)(aaaa >> 8);
348 a[2] = (U8)(aaaa & 0xFF);
349 b[1] = (U8)(bbbb >> 8);
350 b[2] = (U8)(bbbb & 0xFF);
351 a[7] = b[7] = (U8)(code >> 8);
352 a[8] = b[8] = (U8)(code & 0xFF);
353 XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
354 XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
355
356
357void
358_uideoCE_8 (code)
359 UV code
360 PREINIT:
361 U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
362 PPCODE:
363 uice[1] = uice[7] = (U8)(code >> 8);
364 uice[2] = uice[8] = (U8)(code & 0xFF);
365 XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
366
367
368SV*
369_isUIdeo (code, uca_vers)
370 UV code;
371 IV uca_vers;
372 bool basic_unified = 0;
373 CODE:
374 /* uca_vers = 0 for _uideoCE_8() */
375 if (CJK_UidIni <= code) {
376 if (codeRange(0xFA0E, 0xFA29))
377 basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
378 else
379 basic_unified = (uca_vers >= 20 ? (code <= CJK_UidF52) :
380 uca_vers >= 18 ? (code <= CJK_UidF51) :
381 uca_vers >= 14 ? (code <= CJK_UidF41) :
382 (code <= CJK_UidFin));
383 }
384 RETVAL = boolSV(
385 (basic_unified)
386 ||
387 (codeRange(CJK_ExtAIni, CJK_ExtAFin))
388 ||
389 (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
390 ||
391 (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
392 ||
393 (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
394 );
395OUTPUT:
396 RETVAL
397
398
399SV*
400mk_SortKey (self, buf)
401 SV* self;
402 SV* buf;
403 PREINIT:
404 SV *dst, **svp;
405 STRLEN dlen, vlen;
406 U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
407 AV *bufAV;
408 HV *selfHV;
409 UV back_flag;
410 I32 i, buf_len;
411 IV lv, level, uca_vers;
412 bool upper_lower, kata_hira, v2i, last_is_var;
413 CODE:
414 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
415 selfHV = (HV*)SvRV(self);
416 else
417 croak("$self is not a HASHREF.");
418
419 svp = hv_fetch(selfHV, "level", 5, FALSE);
420 level = svp ? SvIV(*svp) : MaxLevel;
421
422 if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
423 bufAV = (AV*)SvRV(buf);
424 else
425 croak("XSUB, not an ARRAYREF.");
426
427 buf_len = av_len(bufAV);
428
429 if (buf_len < 0) { /* empty: -1 */
430 dlen = 2 * (MaxLevel - 1);
431 dst = newSV(dlen);
432 (void)SvPOK_only(dst);
433 d = (U8*)SvPVX(dst);
434 while (dlen--)
435 *d++ = '\0';
436 }
437 else {
438 for (lv = 0; lv < level; lv++) {
439 New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
440 s[lv] = eachlevel[lv];
441 }
442
443 svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
444 upper_lower = svp ? SvTRUE(*svp) : FALSE;
445 svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
446 kata_hira = svp ? SvTRUE(*svp) : FALSE;
447 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
448 uca_vers = SvIV(*svp);
449 svp = hv_fetch(selfHV, "variable", 8, FALSE);
450 v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
451 ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
452 : FALSE;
453
454 last_is_var = FALSE;
455 for (i = 0; i <= buf_len; i++) {
456 svp = av_fetch(bufAV, i, FALSE);
457
458 if (svp && SvPOK(*svp))
459 v = (U8*)SvPV(*svp, vlen);
460 else
461 croak("not a vwt.");
462
463 if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
464 continue;
465
466 /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
467 if (v2i) {
468 if (*v)
469 last_is_var = TRUE;
470 else if (v[1] || v[2]) /* non zero primary weight */
471 last_is_var = FALSE;
472 else if (last_is_var) /* zero primary weight; skipped */
473 continue;
474 }
475
476 if (v[5] == 0) { /* tert wt < 256 */
477 if (upper_lower) {
478 if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
479 v[6] -= 6;
480 else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
481 v[6] += 6;
482 else if (v[6] == 0x1C) /* square upper */
483 v[6]++;
484 else if (v[6] == 0x1D) /* square lower */
485 v[6]--;
486 }
487 if (kata_hira) {
488 if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
489 v[6] -= 2;
490 else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
491 v[6] += 5;
492 }
493 }
494
495 for (lv = 0; lv < level; lv++) {
496 if (v[2 * lv + 1] || v[2 * lv + 2]) {
497 *s[lv]++ = v[2 * lv + 1];
498 *s[lv]++ = v[2 * lv + 2];
499 }
500 }
501 }
502
503 dlen = 2 * (MaxLevel - 1);
504 for (lv = 0; lv < level; lv++)
505 dlen += s[lv] - eachlevel[lv];
506
507 dst = newSV(dlen);
508 (void)SvPOK_only(dst);
509 d = (U8*)SvPVX(dst);
510
511 svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
512 back_flag = svp ? SvUV(*svp) : (UV)0;
513
514 for (lv = 0; lv < level; lv++) {
515 if (back_flag & (1 << (lv + 1))) {
516 p = s[lv];
517 e = eachlevel[lv];
518 for ( ; e < p; p -= 2) {
519 *d++ = p[-2];
520 *d++ = p[-1];
521 }
522 }
523 else {
524 p = eachlevel[lv];
525 e = s[lv];
526 while (p < e)
527 *d++ = *p++;
528 }
529 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
530 *d++ = '\0';
531 *d++ = '\0';
532 }
533 }
534
535 for (lv = level; lv < MaxLevel; lv++) {
536 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
537 *d++ = '\0';
538 *d++ = '\0';
539 }
540 }
541
542 for (lv = 0; lv < level; lv++) {
543 Safefree(eachlevel[lv]);
544 }
545 }
546 *d = '\0';
547 SvCUR_set(dst, d - (U8*)SvPVX(dst));
548 RETVAL = dst;
549OUTPUT:
550 RETVAL
551
552
553SV*
554_varCE (vbl, vce)
555 SV* vbl
556 SV* vce
557 PREINIT:
558 SV *dst;
559 U8 *a, *v, *d;
560 STRLEN alen, vlen;
561 CODE:
562 a = (U8*)SvPV(vbl, alen);
563 v = (U8*)SvPV(vce, vlen);
564
565 dst = newSV(vlen);
566 d = (U8*)SvPVX(dst);
567 (void)SvPOK_only(dst);
568 Copy(v, d, vlen, U8);
569 SvCUR_set(dst, vlen);
570 d[vlen] = '\0';
571
572 /* variable: checked only the first char and the length,
573 trusting checkCollator() and %VariableOK in Perl ... */
574
575 if (vlen < VCE_Length /* ignore short VCE (unexpected) */
576 ||
577 *a == 'n') /* 'non-ignorable' */
578 1;
579 else if (*v) {
580 if (*a == 's') { /* shifted or shift-trimmed */
581 d[7] = d[1]; /* wt level 1 to 4 */
582 d[8] = d[2];
583 }
584 d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
585 }
586 else if (*a == 'b') /* blanked */
587 1;
588 else if (*a == 's') { /* shifted or shift-trimmed */
589 if (alen == 7 && (d[1] + d[2] + d[3] + d[4] + d[5] + d[6])) {
590 d[7] = (U8)(Shift4Wt >> 8);
591 d[8] = (U8)(Shift4Wt & 0xFF);
592 }
593 else {
594 d[7] = d[8] = 0;
595 }
596 }
597 else
598 croak("unknown variable value '%s'", a);
599 RETVAL = dst;
600OUTPUT:
601 RETVAL
602
603
604
605SV*
606visualizeSortKey (self, key)
607 SV * self
608 SV * key
609 PREINIT:
610 HV *selfHV;
611 SV **svp, *dst;
612 U8 *s, *e, *d;
613 STRLEN klen, dlen;
614 UV uv;
615 IV uca_vers;
616 static char *upperhex = "0123456789ABCDEF";
617 CODE:
618 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
619 selfHV = (HV*)SvRV(self);
620 else
621 croak("$self is not a HASHREF.");
622
623 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
624 if (!svp)
625 croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
626 uca_vers = SvIV(*svp);
627
628 s = (U8*)SvPV(key, klen);
629
630 /* slightly *longer* than the need, but I'm afraid of miscounting;
631 exactly: (klen / 2) * 5 + MaxLevel * 2 - 1 (excluding '\0')
632 = (klen / 2) * 5 - 1 # FFFF (16bit) and ' ' between 16bit units
633 + (MaxLevel - 1) * 2 # ' ' and '|' for level boundaries
634 + 2 # '[' and ']'
635 */
636 dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
637 dst = newSV(dlen);
638 (void)SvPOK_only(dst);
639 d = (U8*)SvPVX(dst);
640
641 *d++ = '[';
642 for (e = s + klen; s < e; s += 2) {
643 uv = (U16)(*s << 8 | s[1]);
644 if (uv) {
645 if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
646 *d++ = ' ';
647 *d++ = upperhex[ (s[0] >> 4) & 0xF ];
648 *d++ = upperhex[ s[0] & 0xF ];
649 *d++ = upperhex[ (s[1] >> 4) & 0xF ];
650 *d++ = upperhex[ s[1] & 0xF ];
651 }
652 else {
653 if ((9 <= uca_vers) && (d[-1] != '['))
654 *d++ = ' ';
655 *d++ = '|';
656 }
657 }
658 *d++ = ']';
659 *d = '\0';
660 SvCUR_set(dst, d - (U8*)SvPVX(dst));
661 RETVAL = dst;
662OUTPUT:
663 RETVAL
664
665
666
667void
668unpack_U (src)
669 SV* src
670 PREINIT:
671 STRLEN srclen, retlen;
672 U8 *s, *p, *e;
673 UV uv;
674 PPCODE:
675 s = (U8*)SvPV(src,srclen);
676 if (!SvUTF8(src)) {
677 SV* tmpsv = sv_mortalcopy(src);
678 if (!SvPOK(tmpsv))
679 (void)sv_pvn_force(tmpsv,&srclen);
680 sv_utf8_upgrade(tmpsv);
681 s = (U8*)SvPV(tmpsv,srclen);
682 }
683 e = s + srclen;
684
685 for (p = s; p < e; p += retlen) {
686 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
687 if (!retlen)
688 croak(ErrRetlenIsZero);
689 XPUSHs(sv_2mortal(newSVuv(uv)));
690 }
691