This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Module-Loaded to CPAN version 0.08
[perl5.git] / cpan / Unicode-Collate / Collate.xs
CommitLineData
f58b9ef1
CBW
1#include "EXTERN.h"
2#include "perl.h"
3#include "XSUB.h"
4
5/* This file is prepared by mkheader */
6#include "ucatbl.h"
7
8/* Perl 5.6.1 ? */
9#ifndef utf8n_to_uvuni
10#define utf8n_to_uvuni utf8_to_uv
11#endif /* utf8n_to_uvuni */
12
13/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
14#ifndef UTF8_ALLOW_BOM
15#define UTF8_ALLOW_BOM (0)
16#endif /* UTF8_ALLOW_BOM */
17
18#ifndef UTF8_ALLOW_SURROGATE
19#define UTF8_ALLOW_SURROGATE (0)
20#endif /* UTF8_ALLOW_SURROGATE */
21
22#ifndef UTF8_ALLOW_FE_FF
23#define UTF8_ALLOW_FE_FF (0)
24#endif /* UTF8_ALLOW_FE_FF */
25
26#ifndef UTF8_ALLOW_FFFF
27#define UTF8_ALLOW_FFFF (0)
28#endif /* UTF8_ALLOW_FFFF */
29
30#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
31
32/* if utf8n_to_uvuni() sets retlen to 0 (?) */
33#define ErrRetlenIsZero "panic (Unicode::Collate): zero-length character"
34
35/* At present, char > 0x10ffff are unaffected without complaint, right? */
36#define VALID_UTF_MAX (0x10ffff)
37#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
38
39static const UV max_div_16 = UV_MAX / 16;
40
41/* Supported Levels */
42#define MinLevel (1)
43#define MaxLevel (4)
44
45/* Shifted weight at 4th level */
46#define Shift4Wt (0xFFFF)
47
48#define VCE_Length (9)
49
50#define Hangul_SBase (0xAC00)
51#define Hangul_SIni (0xAC00)
52#define Hangul_SFin (0xD7A3)
53#define Hangul_NCount (588)
54#define Hangul_TCount (28)
55#define Hangul_LBase (0x1100)
56#define Hangul_LIni (0x1100)
57#define Hangul_LFin (0x1159)
58#define Hangul_LFill (0x115F)
59#define Hangul_LEnd (0x115F) /* Unicode 5.2 */
60#define Hangul_VBase (0x1161)
61#define Hangul_VIni (0x1160) /* from Vowel Filler */
62#define Hangul_VFin (0x11A2)
63#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */
64#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */
65#define Hangul_TIni (0x11A8)
66#define Hangul_TFin (0x11F9)
67#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */
68#define HangulL2Ini (0xA960) /* Unicode 5.2 */
69#define HangulL2Fin (0xA97C) /* Unicode 5.2 */
70#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */
71#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */
72#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */
73#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */
74
75#define CJK_UidIni (0x4E00)
76#define CJK_UidFin (0x9FA5)
77#define CJK_UidF41 (0x9FBB)
78#define CJK_UidF51 (0x9FC3)
79#define CJK_UidF52 (0x9FCB)
80#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */
81#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */
82#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */
83#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */
84#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */
85#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */
86#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */
87#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */
88
46267efc
CBW
89#define CJK_CompIni (0xFA0E)
90#define CJK_CompFin (0xFA29)
f58b9ef1
CBW
91static STDCHAR UnifiedCompat[] = {
92 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
93}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
94
95#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode))
96
97MODULE = Unicode::Collate PACKAGE = Unicode::Collate
98
99PROTOTYPES: DISABLE
100
101void
102_fetch_rest ()
103 PREINIT:
104 char ** rest;
105 PPCODE:
106 for (rest = UCA_rest; *rest; ++rest) {
107 XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
108 }
109
110
111void
112_fetch_simple (uv)
113 UV uv
114 PREINIT:
115 U8 ***plane, **row;
116 U8* result = NULL;
117 PPCODE:
118 if (!OVER_UTF_MAX(uv)){
119 plane = (U8***)UCA_simple[uv >> 16];
120 if (plane) {
121 row = plane[(uv >> 8) & 0xff];
122 result = row ? row[uv & 0xff] : NULL;
123 }
124 }
125 if (result) {
126 int i;
127 int num = (int)*result;
128 ++result;
129 for (i = 0; i < num; ++i) {
130 XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
131 result += VCE_Length;
132 }
133 } else {
134 XPUSHs(sv_2mortal(newSViv(0)));
135 }
136
137SV*
138_ignorable_simple (uv)
139 UV uv
140 ALIAS:
141 _exists_simple = 1
142 PREINIT:
143 U8 ***plane, **row;
144 int num = -1;
145 U8* result = NULL;
146 CODE:
147 if (!OVER_UTF_MAX(uv)){
148 plane = (U8***)UCA_simple[uv >> 16];
149 if (plane) {
150 row = plane[(uv >> 8) & 0xff];
151 result = row ? row[uv & 0xff] : NULL;
152 }
153 if (result)
154 num = (int)*result; /* assuming 0 <= num < 128 */
155 }
156
157 if (ix)
158 RETVAL = boolSV(num >0);
159 else
160 RETVAL = boolSV(num==0);
161 OUTPUT:
162 RETVAL
163
164
165void
166_getHexArray (src)
167 SV* src
168 PREINIT:
169 char *s, *e;
170 STRLEN byte;
171 UV value;
172 bool overflowed = FALSE;
173 const char *hexdigit;
174 PPCODE:
175 s = SvPV(src,byte);
176 for (e = s + byte; s < e;) {
177 hexdigit = strchr((char *) PL_hexdigit, *s++);
178 if (! hexdigit)
179 continue;
180 value = (hexdigit - PL_hexdigit) & 0xF;
181 while (*s) {
182 hexdigit = strchr((char *) PL_hexdigit, *s++);
183 if (! hexdigit)
184 break;
185 if (overflowed)
186 continue;
187 if (value > max_div_16) {
188 overflowed = TRUE;
189 continue;
190 }
191 value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
192 }
193 XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
194 }
195
196
197SV*
198_isIllegal (sv)
199 SV* sv
200 PREINIT:
201 UV uv;
202 CODE:
203 if (!sv || !SvIOK(sv))
204 XSRETURN_YES;
205 uv = SvUVX(sv);
206 RETVAL = boolSV(
207 0x10FFFF < uv /* out of range */
208 );
209OUTPUT:
210 RETVAL
211
212
213SV*
214_isNonchar (sv)
215 SV* sv
216 PREINIT:
217 UV uv;
218 CODE:
219 /* should be called only if ! _isIllegal(sv). */
220 uv = SvUVX(sv);
221 RETVAL = boolSV(
222 ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */
223 || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */
224 || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */
225 );
226OUTPUT:
227 RETVAL
228
229
230void
231_decompHangul (code)
232 UV code
233 PREINIT:
234 UV sindex, lindex, vindex, tindex;
235 PPCODE:
236 /* code *must* be in Hangul syllable.
237 * Check it before you enter here. */
238 sindex = code - Hangul_SBase;
239 lindex = sindex / Hangul_NCount;
240 vindex = (sindex % Hangul_NCount) / Hangul_TCount;
241 tindex = sindex % Hangul_TCount;
242
243 XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
244 XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
245 if (tindex)
246 XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
247
248
249SV*
250getHST (code, uca_vers = 0)
251 UV code;
252 IV uca_vers;
253 PREINIT:
0cb4637e 254 const char * hangtype;
f58b9ef1
CBW
255 STRLEN typelen;
256 CODE:
257 if (codeRange(Hangul_SIni, Hangul_SFin)) {
258 if ((code - Hangul_SBase) % Hangul_TCount) {
259 hangtype = "LVT"; typelen = 3;
260 } else {
261 hangtype = "LV"; typelen = 2;
262 }
263 } else if (uca_vers < 20) {
264 if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
265 hangtype = "L"; typelen = 1;
266 } else if (codeRange(Hangul_VIni, Hangul_VFin)) {
267 hangtype = "V"; typelen = 1;
268 } else if (codeRange(Hangul_TIni, Hangul_TFin)) {
269 hangtype = "T"; typelen = 1;
270 } else {
271 hangtype = ""; typelen = 0;
272 }
273 } else {
274 if (codeRange(Hangul_LIni, Hangul_LEnd) ||
275 codeRange(HangulL2Ini, HangulL2Fin)) {
276 hangtype = "L"; typelen = 1;
277 } else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
278 codeRange(HangulV2Ini, HangulV2Fin)) {
279 hangtype = "V"; typelen = 1;
280 } else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
281 codeRange(HangulT2Ini, HangulT2Fin)) {
282 hangtype = "T"; typelen = 1;
283 } else {
284 hangtype = ""; typelen = 0;
285 }
286 }
287
288 RETVAL = newSVpvn(hangtype, typelen);
289OUTPUT:
290 RETVAL
291
292
293void
294_derivCE_9 (code)
295 UV code
296 ALIAS:
297 _derivCE_14 = 1
298 _derivCE_18 = 2
299 _derivCE_20 = 3
300 _derivCE_22 = 4
301 PREINIT:
302 UV base, aaaa, bbbb;
303 U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
304 U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
305 bool basic_unified = 0;
306 PPCODE:
307 if (CJK_UidIni <= code) {
46267efc
CBW
308 if (codeRange(CJK_CompIni, CJK_CompFin))
309 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni];
f58b9ef1
CBW
310 else
311 basic_unified = (ix >= 3 ? (code <= CJK_UidF52) :
312 ix == 2 ? (code <= CJK_UidF51) :
313 ix == 1 ? (code <= CJK_UidF41) :
314 (code <= CJK_UidFin));
315 }
316 base = (basic_unified)
317 ? 0xFB40 : /* CJK */
318 ((codeRange(CJK_ExtAIni, CJK_ExtAFin))
319 ||
320 (codeRange(CJK_ExtBIni, CJK_ExtBFin))
321 ||
322 (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
323 ||
324 (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)))
325 ? 0xFB80 /* CJK ext. */
326 : 0xFBC0; /* others */
327 aaaa = base + (code >> 15);
328 bbbb = (code & 0x7FFF) | 0x8000;
329 a[1] = (U8)(aaaa >> 8);
330 a[2] = (U8)(aaaa & 0xFF);
331 b[1] = (U8)(bbbb >> 8);
332 b[2] = (U8)(bbbb & 0xFF);
333 a[7] = b[7] = (U8)(code >> 8);
334 a[8] = b[8] = (U8)(code & 0xFF);
335 XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
336 XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
337
338
339void
340_derivCE_8 (code)
341 UV code
342 PREINIT:
343 UV aaaa, bbbb;
344 U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF";
345 U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
346 PPCODE:
347 aaaa = 0xFF80 + (code >> 15);
348 bbbb = (code & 0x7FFF) | 0x8000;
349 a[1] = (U8)(aaaa >> 8);
350 a[2] = (U8)(aaaa & 0xFF);
351 b[1] = (U8)(bbbb >> 8);
352 b[2] = (U8)(bbbb & 0xFF);
353 a[7] = b[7] = (U8)(code >> 8);
354 a[8] = b[8] = (U8)(code & 0xFF);
355 XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
356 XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
357
358
359void
360_uideoCE_8 (code)
361 UV code
362 PREINIT:
363 U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
364 PPCODE:
365 uice[1] = uice[7] = (U8)(code >> 8);
366 uice[2] = uice[8] = (U8)(code & 0xFF);
367 XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
368
369
370SV*
371_isUIdeo (code, uca_vers)
372 UV code;
373 IV uca_vers;
374 bool basic_unified = 0;
375 CODE:
376 /* uca_vers = 0 for _uideoCE_8() */
377 if (CJK_UidIni <= code) {
46267efc
CBW
378 if (codeRange(CJK_CompIni, CJK_CompFin))
379 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni];
f58b9ef1
CBW
380 else
381 basic_unified = (uca_vers >= 20 ? (code <= CJK_UidF52) :
382 uca_vers >= 18 ? (code <= CJK_UidF51) :
383 uca_vers >= 14 ? (code <= CJK_UidF41) :
384 (code <= CJK_UidFin));
385 }
386 RETVAL = boolSV(
387 (basic_unified)
388 ||
389 (codeRange(CJK_ExtAIni, CJK_ExtAFin))
390 ||
391 (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
392 ||
393 (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
394 ||
395 (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
396 );
397OUTPUT:
398 RETVAL
399
400
401SV*
402mk_SortKey (self, buf)
403 SV* self;
404 SV* buf;
405 PREINIT:
406 SV *dst, **svp;
407 STRLEN dlen, vlen;
408 U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
409 AV *bufAV;
410 HV *selfHV;
411 UV back_flag;
412 I32 i, buf_len;
413 IV lv, level, uca_vers;
414 bool upper_lower, kata_hira, v2i, last_is_var;
415 CODE:
416 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
417 selfHV = (HV*)SvRV(self);
418 else
419 croak("$self is not a HASHREF.");
420
f58b9ef1
CBW
421 if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
422 bufAV = (AV*)SvRV(buf);
423 else
424 croak("XSUB, not an ARRAYREF.");
425
426 buf_len = av_len(bufAV);
427
428 if (buf_len < 0) { /* empty: -1 */
429 dlen = 2 * (MaxLevel - 1);
430 dst = newSV(dlen);
431 (void)SvPOK_only(dst);
432 d = (U8*)SvPVX(dst);
433 while (dlen--)
434 *d++ = '\0';
19265284
CBW
435 } else {
436 svp = hv_fetch(selfHV, "level", 5, FALSE);
437 level = svp ? SvIV(*svp) : MaxLevel;
438
f58b9ef1
CBW
439 for (lv = 0; lv < level; lv++) {
440 New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
441 s[lv] = eachlevel[lv];
442 }
443
444 svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
445 upper_lower = svp ? SvTRUE(*svp) : FALSE;
446 svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
447 kata_hira = svp ? SvTRUE(*svp) : FALSE;
448 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
449 uca_vers = SvIV(*svp);
450 svp = hv_fetch(selfHV, "variable", 8, FALSE);
451 v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
452 ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
453 : FALSE;
454
455 last_is_var = FALSE;
456 for (i = 0; i <= buf_len; i++) {
457 svp = av_fetch(bufAV, i, FALSE);
458
459 if (svp && SvPOK(*svp))
460 v = (U8*)SvPV(*svp, vlen);
461 else
462 croak("not a vwt.");
463
464 if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
465 continue;
466
467 /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
468 if (v2i) {
469 if (*v)
470 last_is_var = TRUE;
471 else if (v[1] || v[2]) /* non zero primary weight */
472 last_is_var = FALSE;
473 else if (last_is_var) /* zero primary weight; skipped */
474 continue;
475 }
476
477 if (v[5] == 0) { /* tert wt < 256 */
478 if (upper_lower) {
479 if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
480 v[6] -= 6;
481 else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
482 v[6] += 6;
483 else if (v[6] == 0x1C) /* square upper */
484 v[6]++;
485 else if (v[6] == 0x1D) /* square lower */
486 v[6]--;
487 }
488 if (kata_hira) {
489 if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
490 v[6] -= 2;
491 else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
492 v[6] += 5;
493 }
494 }
495
496 for (lv = 0; lv < level; lv++) {
497 if (v[2 * lv + 1] || v[2 * lv + 2]) {
498 *s[lv]++ = v[2 * lv + 1];
499 *s[lv]++ = v[2 * lv + 2];
500 }
501 }
502 }
503
504 dlen = 2 * (MaxLevel - 1);
505 for (lv = 0; lv < level; lv++)
506 dlen += s[lv] - eachlevel[lv];
507
508 dst = newSV(dlen);
509 (void)SvPOK_only(dst);
510 d = (U8*)SvPVX(dst);
511
512 svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
513 back_flag = svp ? SvUV(*svp) : (UV)0;
514
515 for (lv = 0; lv < level; lv++) {
516 if (back_flag & (1 << (lv + 1))) {
517 p = s[lv];
518 e = eachlevel[lv];
519 for ( ; e < p; p -= 2) {
520 *d++ = p[-2];
521 *d++ = p[-1];
522 }
523 }
524 else {
525 p = eachlevel[lv];
526 e = s[lv];
527 while (p < e)
528 *d++ = *p++;
529 }
530 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
531 *d++ = '\0';
532 *d++ = '\0';
533 }
534 }
535
536 for (lv = level; lv < MaxLevel; lv++) {
537 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
538 *d++ = '\0';
539 *d++ = '\0';
540 }
541 }
542
543 for (lv = 0; lv < level; lv++) {
544 Safefree(eachlevel[lv]);
545 }
546 }
547 *d = '\0';
548 SvCUR_set(dst, d - (U8*)SvPVX(dst));
549 RETVAL = dst;
550OUTPUT:
551 RETVAL
552
553
554SV*
19265284
CBW
555varCE (self, vce)
556 SV* self;
557 SV* vce;
f58b9ef1 558 PREINIT:
19265284
CBW
559 SV *dst, *vbl, **svp;
560 HV *selfHV;
f58b9ef1
CBW
561 U8 *a, *v, *d;
562 STRLEN alen, vlen;
19265284
CBW
563 bool ig_l2;
564 UV totwt;
f58b9ef1 565 CODE:
19265284
CBW
566 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
567 selfHV = (HV*)SvRV(self);
568 else
569 croak("$self is not a HASHREF.");
570
571 svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE);
572 ig_l2 = svp ? SvTRUE(*svp) : FALSE;
573
574 svp = hv_fetch(selfHV, "variable", 8, FALSE);
575 vbl = svp ? *svp : &PL_sv_no;
f58b9ef1
CBW
576 a = (U8*)SvPV(vbl, alen);
577 v = (U8*)SvPV(vce, vlen);
578
579 dst = newSV(vlen);
580 d = (U8*)SvPVX(dst);
581 (void)SvPOK_only(dst);
582 Copy(v, d, vlen, U8);
583 SvCUR_set(dst, vlen);
584 d[vlen] = '\0';
585
19265284
CBW
586 /* primary weight == 0 && secondary weight != 0 */
587 if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) {
588 d[3] = d[4] = d[5] = d[6] = '\0';
589 }
590
f58b9ef1
CBW
591 /* variable: checked only the first char and the length,
592 trusting checkCollator() and %VariableOK in Perl ... */
593
594 if (vlen < VCE_Length /* ignore short VCE (unexpected) */
595 ||
19265284 596 *a == 'n') /* non-ignorable */
f58b9ef1
CBW
597 1;
598 else if (*v) {
599 if (*a == 's') { /* shifted or shift-trimmed */
600 d[7] = d[1]; /* wt level 1 to 4 */
601 d[8] = d[2];
19265284
CBW
602 } /* else blanked */
603
f58b9ef1
CBW
604 d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
605 }
606 else if (*a == 'b') /* blanked */
607 1;
608 else if (*a == 's') { /* shifted or shift-trimmed */
19265284
CBW
609 totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6];
610 if (alen == 7 && totwt != 0) { /* shifted */
f58b9ef1
CBW
611 d[7] = (U8)(Shift4Wt >> 8);
612 d[8] = (U8)(Shift4Wt & 0xFF);
613 }
19265284
CBW
614 else { /* shift-trimmed */
615 d[7] = d[8] = '\0';
f58b9ef1
CBW
616 }
617 }
618 else
619 croak("unknown variable value '%s'", a);
620 RETVAL = dst;
621OUTPUT:
622 RETVAL
623
624
625
626SV*
627visualizeSortKey (self, key)
628 SV * self
629 SV * key
630 PREINIT:
631 HV *selfHV;
632 SV **svp, *dst;
633 U8 *s, *e, *d;
634 STRLEN klen, dlen;
635 UV uv;
636 IV uca_vers;
0cb4637e 637 static const char *upperhex = "0123456789ABCDEF";
f58b9ef1
CBW
638 CODE:
639 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
640 selfHV = (HV*)SvRV(self);
641 else
642 croak("$self is not a HASHREF.");
643
644 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
645 if (!svp)
646 croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
647 uca_vers = SvIV(*svp);
648
649 s = (U8*)SvPV(key, klen);
650
651 /* slightly *longer* than the need, but I'm afraid of miscounting;
652 exactly: (klen / 2) * 5 + MaxLevel * 2 - 1 (excluding '\0')
653 = (klen / 2) * 5 - 1 # FFFF (16bit) and ' ' between 16bit units
654 + (MaxLevel - 1) * 2 # ' ' and '|' for level boundaries
655 + 2 # '[' and ']'
656 */
657 dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
658 dst = newSV(dlen);
659 (void)SvPOK_only(dst);
660 d = (U8*)SvPVX(dst);
661
662 *d++ = '[';
663 for (e = s + klen; s < e; s += 2) {
664 uv = (U16)(*s << 8 | s[1]);
665 if (uv) {
666 if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
667 *d++ = ' ';
668 *d++ = upperhex[ (s[0] >> 4) & 0xF ];
669 *d++ = upperhex[ s[0] & 0xF ];
670 *d++ = upperhex[ (s[1] >> 4) & 0xF ];
671 *d++ = upperhex[ s[1] & 0xF ];
672 }
673 else {
674 if ((9 <= uca_vers) && (d[-1] != '['))
675 *d++ = ' ';
676 *d++ = '|';
677 }
678 }
679 *d++ = ']';
680 *d = '\0';
681 SvCUR_set(dst, d - (U8*)SvPVX(dst));
682 RETVAL = dst;
683OUTPUT:
684 RETVAL
685
686
687
688void
689unpack_U (src)
690 SV* src
691 PREINIT:
692 STRLEN srclen, retlen;
693 U8 *s, *p, *e;
694 UV uv;
695 PPCODE:
696 s = (U8*)SvPV(src,srclen);
697 if (!SvUTF8(src)) {
698 SV* tmpsv = sv_mortalcopy(src);
699 if (!SvPOK(tmpsv))
700 (void)sv_pvn_force(tmpsv,&srclen);
701 sv_utf8_upgrade(tmpsv);
702 s = (U8*)SvPV(tmpsv,srclen);
703 }
704 e = s + srclen;
705
706 for (p = s; p < e; p += retlen) {
707 uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
708 if (!retlen)
709 croak(ErrRetlenIsZero);
710 XPUSHs(sv_2mortal(newSVuv(uv)));
711 }
712