This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Sync with CPAN version 5.00 of Term::ANSIColor
[perl5.git] / cpan / Unicode-Collate / Collate.xs
CommitLineData
cd7f10f7
CBW
1
2#define PERL_NO_GET_CONTEXT /* we want efficiency */
3
4/* I guese no private function needs pTHX_ and aTHX_ */
5
f58b9ef1
CBW
6#include "EXTERN.h"
7#include "perl.h"
8#include "XSUB.h"
9
10/* This file is prepared by mkheader */
11#include "ucatbl.h"
12
f58b9ef1
CBW
13/* At present, char > 0x10ffff are unaffected without complaint, right? */
14#define VALID_UTF_MAX (0x10ffff)
15#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
16
9d8690d8 17#define MAX_DIV_16 (UV_MAX / 16)
f58b9ef1
CBW
18
19/* Supported Levels */
20#define MinLevel (1)
21#define MaxLevel (4)
22
23/* Shifted weight at 4th level */
24#define Shift4Wt (0xFFFF)
25
26#define VCE_Length (9)
27
28#define Hangul_SBase (0xAC00)
29#define Hangul_SIni (0xAC00)
30#define Hangul_SFin (0xD7A3)
31#define Hangul_NCount (588)
32#define Hangul_TCount (28)
33#define Hangul_LBase (0x1100)
34#define Hangul_LIni (0x1100)
35#define Hangul_LFin (0x1159)
36#define Hangul_LFill (0x115F)
37#define Hangul_LEnd (0x115F) /* Unicode 5.2 */
38#define Hangul_VBase (0x1161)
39#define Hangul_VIni (0x1160) /* from Vowel Filler */
40#define Hangul_VFin (0x11A2)
41#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */
42#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */
43#define Hangul_TIni (0x11A8)
44#define Hangul_TFin (0x11F9)
45#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */
46#define HangulL2Ini (0xA960) /* Unicode 5.2 */
47#define HangulL2Fin (0xA97C) /* Unicode 5.2 */
48#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */
49#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */
50#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */
51#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */
52
53#define CJK_UidIni (0x4E00)
54#define CJK_UidFin (0x9FA5)
eaecde9a
CBW
55#define CJK_UidF41 (0x9FBB) /* Unicode 4.1 */
56#define CJK_UidF51 (0x9FC3) /* Unicode 5.1 */
57#define CJK_UidF52 (0x9FCB) /* Unicode 5.2 */
58#define CJK_UidF61 (0x9FCC) /* Unicode 6.1 */
59#define CJK_UidF80 (0x9FD5) /* Unicode 8.0 */
60#define CJK_UidF100 (0x9FEA) /* Unicode 10.0 */
61
f58b9ef1
CBW
62#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */
63#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */
64#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */
65#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */
66#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */
67#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */
68#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */
69#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */
fcd4e2f8
SH
70#define CJK_ExtEIni (0x2B820) /* Unicode 8.0 */
71#define CJK_ExtEFin (0x2CEA1) /* Unicode 8.0 */
eaecde9a
CBW
72#define CJK_ExtFIni (0x2CEB0) /* Unicode 10.0 */
73#define CJK_ExtFFin (0x2EBE0) /* Unicode 10.0 */
f58b9ef1 74
46267efc
CBW
75#define CJK_CompIni (0xFA0E)
76#define CJK_CompFin (0xFA29)
9d8690d8 77static const STDCHAR UnifiedCompat[] = {
f58b9ef1
CBW
78 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
79}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
80
6608d2d5
CBW
81#define TangIdeoIni (0x17000) /* Unicode 9.0 */
82#define TangIdeoFin (0x187EC) /* Unicode 9.0 */
83#define TangCompIni (0x18800) /* Unicode 9.0 */
84#define TangCompFin (0x18AF2) /* Unicode 9.0 */
eaecde9a
CBW
85#define NushuIni (0x1B170) /* Unicode 10.0 */
86#define NushuFin (0x1B2FB) /* Unicode 10.0 */
6608d2d5 87
f58b9ef1
CBW
88#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode))
89
90MODULE = Unicode::Collate PACKAGE = Unicode::Collate
91
92PROTOTYPES: DISABLE
93
94void
95_fetch_rest ()
96 PREINIT:
97 char ** rest;
98 PPCODE:
9d8690d8 99 for (rest = (char **)UCA_rest; *rest; ++rest) {
f58b9ef1
CBW
100 XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
101 }
102
103
104void
105_fetch_simple (uv)
106 UV uv
107 PREINIT:
108 U8 ***plane, **row;
109 U8* result = NULL;
110 PPCODE:
111 if (!OVER_UTF_MAX(uv)){
112 plane = (U8***)UCA_simple[uv >> 16];
113 if (plane) {
114 row = plane[(uv >> 8) & 0xff];
115 result = row ? row[uv & 0xff] : NULL;
116 }
117 }
118 if (result) {
119 int i;
120 int num = (int)*result;
121 ++result;
9d8690d8 122 EXTEND(SP, num);
f58b9ef1 123 for (i = 0; i < num; ++i) {
9d8690d8 124 PUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
f58b9ef1
CBW
125 result += VCE_Length;
126 }
127 } else {
9d8690d8 128 PUSHs(sv_2mortal(newSViv(0)));
f58b9ef1
CBW
129 }
130
131SV*
132_ignorable_simple (uv)
133 UV uv
134 ALIAS:
135 _exists_simple = 1
136 PREINIT:
137 U8 ***plane, **row;
138 int num = -1;
139 U8* result = NULL;
140 CODE:
141 if (!OVER_UTF_MAX(uv)){
142 plane = (U8***)UCA_simple[uv >> 16];
143 if (plane) {
144 row = plane[(uv >> 8) & 0xff];
145 result = row ? row[uv & 0xff] : NULL;
146 }
147 if (result)
148 num = (int)*result; /* assuming 0 <= num < 128 */
149 }
150
151 if (ix)
152 RETVAL = boolSV(num >0);
153 else
154 RETVAL = boolSV(num==0);
155 OUTPUT:
156 RETVAL
157
158
159void
160_getHexArray (src)
161 SV* src
162 PREINIT:
163 char *s, *e;
164 STRLEN byte;
165 UV value;
166 bool overflowed = FALSE;
167 const char *hexdigit;
168 PPCODE:
169 s = SvPV(src,byte);
170 for (e = s + byte; s < e;) {
171 hexdigit = strchr((char *) PL_hexdigit, *s++);
9d8690d8 172 if (! hexdigit)
f58b9ef1
CBW
173 continue;
174 value = (hexdigit - PL_hexdigit) & 0xF;
175 while (*s) {
176 hexdigit = strchr((char *) PL_hexdigit, *s++);
177 if (! hexdigit)
178 break;
179 if (overflowed)
180 continue;
9d8690d8 181 if (value > MAX_DIV_16) {
f58b9ef1
CBW
182 overflowed = TRUE;
183 continue;
184 }
185 value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
186 }
187 XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
188 }
189
190
191SV*
192_isIllegal (sv)
193 SV* sv
194 PREINIT:
195 UV uv;
196 CODE:
197 if (!sv || !SvIOK(sv))
198 XSRETURN_YES;
199 uv = SvUVX(sv);
200 RETVAL = boolSV(
60f577e0 201 0x10FFFF < uv /* out of range */
4779e03e 202 || ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] */
f58b9ef1
CBW
203 || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */
204 || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */
205 );
206OUTPUT:
207 RETVAL
208
209
210void
211_decompHangul (code)
212 UV code
213 PREINIT:
214 UV sindex, lindex, vindex, tindex;
215 PPCODE:
216 /* code *must* be in Hangul syllable.
217 * Check it before you enter here. */
218 sindex = code - Hangul_SBase;
219 lindex = sindex / Hangul_NCount;
220 vindex = (sindex % Hangul_NCount) / Hangul_TCount;
221 tindex = sindex % Hangul_TCount;
222
9d8690d8
CBW
223 EXTEND(SP, tindex ? 3 : 2);
224 PUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
225 PUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
f58b9ef1 226 if (tindex)
9d8690d8 227 PUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
f58b9ef1
CBW
228
229
230SV*
231getHST (code, uca_vers = 0)
232 UV code;
233 IV uca_vers;
234 PREINIT:
0cb4637e 235 const char * hangtype;
f58b9ef1
CBW
236 STRLEN typelen;
237 CODE:
238 if (codeRange(Hangul_SIni, Hangul_SFin)) {
239 if ((code - Hangul_SBase) % Hangul_TCount) {
240 hangtype = "LVT"; typelen = 3;
241 } else {
242 hangtype = "LV"; typelen = 2;
243 }
244 } else if (uca_vers < 20) {
245 if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
246 hangtype = "L"; typelen = 1;
247 } else if (codeRange(Hangul_VIni, Hangul_VFin)) {
248 hangtype = "V"; typelen = 1;
249 } else if (codeRange(Hangul_TIni, Hangul_TFin)) {
250 hangtype = "T"; typelen = 1;
251 } else {
252 hangtype = ""; typelen = 0;
253 }
254 } else {
255 if (codeRange(Hangul_LIni, Hangul_LEnd) ||
256 codeRange(HangulL2Ini, HangulL2Fin)) {
257 hangtype = "L"; typelen = 1;
258 } else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
259 codeRange(HangulV2Ini, HangulV2Fin)) {
260 hangtype = "V"; typelen = 1;
261 } else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
262 codeRange(HangulT2Ini, HangulT2Fin)) {
263 hangtype = "T"; typelen = 1;
264 } else {
265 hangtype = ""; typelen = 0;
266 }
267 }
268
269 RETVAL = newSVpvn(hangtype, typelen);
270OUTPUT:
271 RETVAL
272
273
274void
275_derivCE_9 (code)
276 UV code
277 ALIAS:
278 _derivCE_14 = 1
279 _derivCE_18 = 2
280 _derivCE_20 = 3
281 _derivCE_22 = 4
cba8842c 282 _derivCE_24 = 5
fcd4e2f8 283 _derivCE_32 = 6
6608d2d5 284 _derivCE_34 = 7
eaecde9a 285 _derivCE_36 = 8
f58b9ef1
CBW
286 PREINIT:
287 UV base, aaaa, bbbb;
126be117
JH
288 U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
289 U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
eaecde9a 290 bool basic_unified = 0, tangut = 0, nushu = 0;
f58b9ef1 291 PPCODE:
6608d2d5 292 if (codeRange(CJK_UidIni, CJK_CompFin)) {
46267efc
CBW
293 if (codeRange(CJK_CompIni, CJK_CompFin))
294 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni];
f58b9ef1 295 else
eaecde9a
CBW
296 basic_unified = (ix >= 8 ? (code <= CJK_UidF100) :
297 ix >= 6 ? (code <= CJK_UidF80) :
fcd4e2f8 298 ix == 5 ? (code <= CJK_UidF61) :
cba8842c 299 ix >= 3 ? (code <= CJK_UidF52) :
f58b9ef1
CBW
300 ix == 2 ? (code <= CJK_UidF51) :
301 ix == 1 ? (code <= CJK_UidF41) :
302 (code <= CJK_UidFin));
eaecde9a
CBW
303 } else {
304 if (ix >= 7)
305 tangut = (codeRange(TangIdeoIni, TangIdeoFin) ||
306 codeRange(TangCompIni, TangCompFin));
307 if (ix >= 8)
308 nushu = (codeRange(NushuIni, NushuFin));
f58b9ef1 309 }
6608d2d5
CBW
310 base = tangut
311 ? 0xFB00 :
eaecde9a
CBW
312 nushu
313 ? 0xFB01 :
6608d2d5 314 basic_unified
f58b9ef1
CBW
315 ? 0xFB40 : /* CJK */
316 ((codeRange(CJK_ExtAIni, CJK_ExtAFin))
317 ||
318 (codeRange(CJK_ExtBIni, CJK_ExtBFin))
319 ||
320 (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
321 ||
fcd4e2f8
SH
322 (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
323 ||
eaecde9a
CBW
324 (ix >= 6 && codeRange(CJK_ExtEIni, CJK_ExtEFin))
325 ||
326 (ix >= 8 && codeRange(CJK_ExtFIni, CJK_ExtFFin)))
f58b9ef1
CBW
327 ? 0xFB80 /* CJK ext. */
328 : 0xFBC0; /* others */
eaecde9a
CBW
329 aaaa = tangut || nushu ? base : base + (code >> 15);
330 bbbb = (tangut ? (code - TangIdeoIni) :
331 nushu ? (code - NushuIni) : (code & 0x7FFF)) | 0x8000;
f58b9ef1
CBW
332 a[1] = (U8)(aaaa >> 8);
333 a[2] = (U8)(aaaa & 0xFF);
334 b[1] = (U8)(bbbb >> 8);
335 b[2] = (U8)(bbbb & 0xFF);
126be117
JH
336 a[4] = (U8)(0x20); /* second octet of level 2 */
337 a[6] = (U8)(0x02); /* second octet of level 3 */
f58b9ef1
CBW
338 a[7] = b[7] = (U8)(code >> 8);
339 a[8] = b[8] = (U8)(code & 0xFF);
9d8690d8
CBW
340 EXTEND(SP, 2);
341 PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
342 PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
f58b9ef1
CBW
343
344
345void
346_derivCE_8 (code)
347 UV code
348 PREINIT:
349 UV aaaa, bbbb;
126be117
JH
350 U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
351 U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
f58b9ef1
CBW
352 PPCODE:
353 aaaa = 0xFF80 + (code >> 15);
354 bbbb = (code & 0x7FFF) | 0x8000;
355 a[1] = (U8)(aaaa >> 8);
356 a[2] = (U8)(aaaa & 0xFF);
357 b[1] = (U8)(bbbb >> 8);
358 b[2] = (U8)(bbbb & 0xFF);
126be117
JH
359 a[4] = (U8)(0x02); /* second octet of level 2 */
360 a[6] = (U8)(0x01); /* second octet of level 3 */
f58b9ef1
CBW
361 a[7] = b[7] = (U8)(code >> 8);
362 a[8] = b[8] = (U8)(code & 0xFF);
9d8690d8
CBW
363 EXTEND(SP, 2);
364 PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
365 PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
f58b9ef1
CBW
366
367
368void
369_uideoCE_8 (code)
370 UV code
371 PREINIT:
126be117 372 U8 uice[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00";
f58b9ef1
CBW
373 PPCODE:
374 uice[1] = uice[7] = (U8)(code >> 8);
375 uice[2] = uice[8] = (U8)(code & 0xFF);
126be117
JH
376 uice[4] = (U8)(0x20); /* second octet of level 2 */
377 uice[6] = (U8)(0x02); /* second octet of level 3 */
9d8690d8 378 PUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
f58b9ef1
CBW
379
380
381SV*
382_isUIdeo (code, uca_vers)
383 UV code;
384 IV uca_vers;
385 bool basic_unified = 0;
386 CODE:
387 /* uca_vers = 0 for _uideoCE_8() */
388 if (CJK_UidIni <= code) {
46267efc
CBW
389 if (codeRange(CJK_CompIni, CJK_CompFin))
390 basic_unified = (bool)UnifiedCompat[code - CJK_CompIni];
f58b9ef1 391 else
eaecde9a
CBW
392 basic_unified = (uca_vers >= 36 ? (code <= CJK_UidF100) :
393 uca_vers >= 32 ? (code <= CJK_UidF80) :
fcd4e2f8 394 uca_vers >= 24 ? (code <= CJK_UidF61) :
cba8842c 395 uca_vers >= 20 ? (code <= CJK_UidF52) :
f58b9ef1
CBW
396 uca_vers >= 18 ? (code <= CJK_UidF51) :
397 uca_vers >= 14 ? (code <= CJK_UidF41) :
398 (code <= CJK_UidFin));
399 }
400 RETVAL = boolSV(
401 (basic_unified)
402 ||
403 (codeRange(CJK_ExtAIni, CJK_ExtAFin))
404 ||
405 (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
406 ||
407 (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
408 ||
409 (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
fcd4e2f8
SH
410 ||
411 (uca_vers >= 32 && codeRange(CJK_ExtEIni, CJK_ExtEFin))
eaecde9a
CBW
412 ||
413 (uca_vers >= 36 && codeRange(CJK_ExtFIni, CJK_ExtFFin))
f58b9ef1
CBW
414 );
415OUTPUT:
416 RETVAL
417
418
419SV*
420mk_SortKey (self, buf)
421 SV* self;
422 SV* buf;
423 PREINIT:
424 SV *dst, **svp;
425 STRLEN dlen, vlen;
426 U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
427 AV *bufAV;
428 HV *selfHV;
429 UV back_flag;
430 I32 i, buf_len;
431 IV lv, level, uca_vers;
432 bool upper_lower, kata_hira, v2i, last_is_var;
433 CODE:
434 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
435 selfHV = (HV*)SvRV(self);
436 else
437 croak("$self is not a HASHREF.");
438
f58b9ef1
CBW
439 if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
440 bufAV = (AV*)SvRV(buf);
441 else
442 croak("XSUB, not an ARRAYREF.");
443
444 buf_len = av_len(bufAV);
445
446 if (buf_len < 0) { /* empty: -1 */
447 dlen = 2 * (MaxLevel - 1);
448 dst = newSV(dlen);
449 (void)SvPOK_only(dst);
450 d = (U8*)SvPVX(dst);
451 while (dlen--)
452 *d++ = '\0';
19265284
CBW
453 } else {
454 svp = hv_fetch(selfHV, "level", 5, FALSE);
455 level = svp ? SvIV(*svp) : MaxLevel;
456
f58b9ef1
CBW
457 for (lv = 0; lv < level; lv++) {
458 New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
459 s[lv] = eachlevel[lv];
460 }
461
462 svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
463 upper_lower = svp ? SvTRUE(*svp) : FALSE;
464 svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
465 kata_hira = svp ? SvTRUE(*svp) : FALSE;
466 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
467 uca_vers = SvIV(*svp);
468 svp = hv_fetch(selfHV, "variable", 8, FALSE);
469 v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
470 ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
471 : FALSE;
472
473 last_is_var = FALSE;
474 for (i = 0; i <= buf_len; i++) {
475 svp = av_fetch(bufAV, i, FALSE);
476
477 if (svp && SvPOK(*svp))
478 v = (U8*)SvPV(*svp, vlen);
479 else
480 croak("not a vwt.");
481
482 if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
483 continue;
484
485 /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
486 if (v2i) {
487 if (*v)
488 last_is_var = TRUE;
489 else if (v[1] || v[2]) /* non zero primary weight */
490 last_is_var = FALSE;
491 else if (last_is_var) /* zero primary weight; skipped */
492 continue;
493 }
494
495 if (v[5] == 0) { /* tert wt < 256 */
496 if (upper_lower) {
497 if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
498 v[6] -= 6;
499 else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
500 v[6] += 6;
501 else if (v[6] == 0x1C) /* square upper */
502 v[6]++;
503 else if (v[6] == 0x1D) /* square lower */
504 v[6]--;
505 }
506 if (kata_hira) {
507 if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
508 v[6] -= 2;
509 else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
510 v[6] += 5;
511 }
512 }
513
514 for (lv = 0; lv < level; lv++) {
515 if (v[2 * lv + 1] || v[2 * lv + 2]) {
516 *s[lv]++ = v[2 * lv + 1];
517 *s[lv]++ = v[2 * lv + 2];
518 }
519 }
520 }
521
522 dlen = 2 * (MaxLevel - 1);
523 for (lv = 0; lv < level; lv++)
524 dlen += s[lv] - eachlevel[lv];
525
526 dst = newSV(dlen);
527 (void)SvPOK_only(dst);
528 d = (U8*)SvPVX(dst);
529
530 svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
531 back_flag = svp ? SvUV(*svp) : (UV)0;
532
533 for (lv = 0; lv < level; lv++) {
534 if (back_flag & (1 << (lv + 1))) {
535 p = s[lv];
536 e = eachlevel[lv];
537 for ( ; e < p; p -= 2) {
538 *d++ = p[-2];
539 *d++ = p[-1];
540 }
541 }
542 else {
543 p = eachlevel[lv];
544 e = s[lv];
545 while (p < e)
546 *d++ = *p++;
547 }
548 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
549 *d++ = '\0';
550 *d++ = '\0';
551 }
552 }
553
554 for (lv = level; lv < MaxLevel; lv++) {
555 if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
556 *d++ = '\0';
557 *d++ = '\0';
558 }
559 }
560
561 for (lv = 0; lv < level; lv++) {
562 Safefree(eachlevel[lv]);
563 }
564 }
565 *d = '\0';
566 SvCUR_set(dst, d - (U8*)SvPVX(dst));
567 RETVAL = dst;
568OUTPUT:
569 RETVAL
570
571
572SV*
19265284
CBW
573varCE (self, vce)
574 SV* self;
575 SV* vce;
f58b9ef1 576 PREINIT:
19265284
CBW
577 SV *dst, *vbl, **svp;
578 HV *selfHV;
f58b9ef1
CBW
579 U8 *a, *v, *d;
580 STRLEN alen, vlen;
19265284 581 bool ig_l2;
eaecde9a 582 IV uca_vers;
19265284 583 UV totwt;
f58b9ef1 584 CODE:
19265284
CBW
585 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
586 selfHV = (HV*)SvRV(self);
587 else
588 croak("$self is not a HASHREF.");
589
590 svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE);
591 ig_l2 = svp ? SvTRUE(*svp) : FALSE;
592
593 svp = hv_fetch(selfHV, "variable", 8, FALSE);
594 vbl = svp ? *svp : &PL_sv_no;
f58b9ef1
CBW
595 a = (U8*)SvPV(vbl, alen);
596 v = (U8*)SvPV(vce, vlen);
597
598 dst = newSV(vlen);
599 d = (U8*)SvPVX(dst);
600 (void)SvPOK_only(dst);
601 Copy(v, d, vlen, U8);
602 SvCUR_set(dst, vlen);
603 d[vlen] = '\0';
604
19265284
CBW
605 /* primary weight == 0 && secondary weight != 0 */
606 if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) {
607 d[3] = d[4] = d[5] = d[6] = '\0';
608 }
609
f58b9ef1
CBW
610 /* variable: checked only the first char and the length,
611 trusting checkCollator() and %VariableOK in Perl ... */
612
f8187d97
SH
613 if (vlen >= VCE_Length && *a != 'n') {
614 if (*v) {
615 if (*a == 's') { /* shifted or shift-trimmed */
750da838
CBW
616 d[7] = d[1]; /* wt level 1 to 4 */
617 d[8] = d[2];
f8187d97
SH
618 } /* else blanked */
619 d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
620 } else if (*a == 's') { /* shifted or shift-trimmed */
621 totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6];
622 if (alen == 7 && totwt != 0) { /* shifted */
623 if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */
624 d[7] = d[1]; /* wt level 1 to 4 */
625 d[8] = d[2];
626 } else {
eaecde9a
CBW
627 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
628 if (!svp)
629 croak("Panic: no $self->{UCA_Version} in varCE");
630 uca_vers = SvIV(*svp);
631
632 /* completely ignorable or the second derived CE */
633 if (uca_vers >= 36 && d[3] + d[4] + d[5] + d[6] == 0) {
634 d[7] = d[8] = '\0';
635 } else {
636 d[7] = (U8)(Shift4Wt >> 8);
637 d[8] = (U8)(Shift4Wt & 0xFF);
638 }
f8187d97
SH
639 }
640 } else { /* shift-trimmed or completely ignorable */
641 d[7] = d[8] = '\0';
750da838 642 }
f8187d97
SH
643 } /* else blanked */
644 } /* else non-ignorable */
f58b9ef1
CBW
645 RETVAL = dst;
646OUTPUT:
647 RETVAL
648
649
650
651SV*
652visualizeSortKey (self, key)
653 SV * self
654 SV * key
655 PREINIT:
656 HV *selfHV;
657 SV **svp, *dst;
658 U8 *s, *e, *d;
659 STRLEN klen, dlen;
660 UV uv;
750da838 661 IV uca_vers, sep = 0;
9d8690d8 662 const char *upperhex = "0123456789ABCDEF";
f58b9ef1
CBW
663 CODE:
664 if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
665 selfHV = (HV*)SvRV(self);
666 else
667 croak("$self is not a HASHREF.");
668
669 svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
670 if (!svp)
671 croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
672 uca_vers = SvIV(*svp);
673
674 s = (U8*)SvPV(key, klen);
675
676 /* slightly *longer* than the need, but I'm afraid of miscounting;
750da838
CBW
677 = (klen / 2) * 5 - 1
678 # FFFF and ' ' for each 16bit units but ' ' is less by 1;
679 # ' ' and '|' for level boundaries including the identical level
680 + 2 # '[' and ']'
681 + 1 # '\0'
682 (a) if klen is odd (not expected), maybe more 5 bytes.
683 (b) there is not always the identical level.
f58b9ef1
CBW
684 */
685 dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
686 dst = newSV(dlen);
687 (void)SvPOK_only(dst);
688 d = (U8*)SvPVX(dst);
689
690 *d++ = '[';
691 for (e = s + klen; s < e; s += 2) {
692 uv = (U16)(*s << 8 | s[1]);
750da838 693 if (uv || sep >= MaxLevel) {
f58b9ef1
CBW
694 if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
695 *d++ = ' ';
696 *d++ = upperhex[ (s[0] >> 4) & 0xF ];
697 *d++ = upperhex[ s[0] & 0xF ];
698 *d++ = upperhex[ (s[1] >> 4) & 0xF ];
699 *d++ = upperhex[ s[1] & 0xF ];
750da838 700 } else {
f58b9ef1
CBW
701 if ((9 <= uca_vers) && (d[-1] != '['))
702 *d++ = ' ';
703 *d++ = '|';
750da838 704 ++sep;
f58b9ef1
CBW
705 }
706 }
707 *d++ = ']';
708 *d = '\0';
709 SvCUR_set(dst, d - (U8*)SvPVX(dst));
710 RETVAL = dst;
711OUTPUT:
712 RETVAL