This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perllocale: Mention /l
[perl5.git] / cpan / Text-Soundex / Soundex.xs
CommitLineData
11f885b5
SP
1/* -*- c -*- */
2
3/* (c) Copyright 1998-2003 by Mark Mielke
4 *
5 * Freedom to use these sources for whatever you want, as long as credit
6 * is given where credit is due, is hereby granted. You may make modifications
7 * where you see fit but leave this copyright somewhere visible. As well try
8 * to initial any changes you make so that if i like the changes i can
9 * incorporate them into any later versions of mine.
10 *
11 * - Mark Mielke <mark@mielke.cc>
12 */
13
14#include "EXTERN.h"
15#include "perl.h"
16#include "XSUB.h"
17
18#define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */
19
20#if !(PERL_REVISION >= 5 && PERL_VERSION >= 8)
21# define utf8n_to_uvchr utf8_to_uv
22#endif
23
ef0f5379
SP
24static char sv_soundex_table[0x100];
25static void sv_soundex_initialize (void)
26{
27 memset(&sv_soundex_table[0], '\0', sizeof(sv_soundex_table));
28 sv_soundex_table['A'] = '0';
29 sv_soundex_table['a'] = '0';
30 sv_soundex_table['E'] = '0';
31 sv_soundex_table['e'] = '0';
32 sv_soundex_table['H'] = '0';
33 sv_soundex_table['h'] = '0';
34 sv_soundex_table['I'] = '0';
35 sv_soundex_table['i'] = '0';
36 sv_soundex_table['O'] = '0';
37 sv_soundex_table['o'] = '0';
38 sv_soundex_table['U'] = '0';
39 sv_soundex_table['u'] = '0';
40 sv_soundex_table['W'] = '0';
41 sv_soundex_table['w'] = '0';
42 sv_soundex_table['Y'] = '0';
43 sv_soundex_table['y'] = '0';
44 sv_soundex_table['B'] = '1';
45 sv_soundex_table['b'] = '1';
46 sv_soundex_table['F'] = '1';
47 sv_soundex_table['f'] = '1';
48 sv_soundex_table['P'] = '1';
49 sv_soundex_table['p'] = '1';
50 sv_soundex_table['V'] = '1';
51 sv_soundex_table['v'] = '1';
52 sv_soundex_table['C'] = '2';
53 sv_soundex_table['c'] = '2';
54 sv_soundex_table['G'] = '2';
55 sv_soundex_table['g'] = '2';
56 sv_soundex_table['J'] = '2';
57 sv_soundex_table['j'] = '2';
58 sv_soundex_table['K'] = '2';
59 sv_soundex_table['k'] = '2';
60 sv_soundex_table['Q'] = '2';
61 sv_soundex_table['q'] = '2';
62 sv_soundex_table['S'] = '2';
63 sv_soundex_table['s'] = '2';
64 sv_soundex_table['X'] = '2';
65 sv_soundex_table['x'] = '2';
66 sv_soundex_table['Z'] = '2';
67 sv_soundex_table['z'] = '2';
68 sv_soundex_table['D'] = '3';
69 sv_soundex_table['d'] = '3';
70 sv_soundex_table['T'] = '3';
71 sv_soundex_table['t'] = '3';
72 sv_soundex_table['L'] = '4';
73 sv_soundex_table['l'] = '4';
74 sv_soundex_table['M'] = '5';
75 sv_soundex_table['m'] = '5';
76 sv_soundex_table['N'] = '5';
77 sv_soundex_table['n'] = '5';
78 sv_soundex_table['R'] = '6';
79 sv_soundex_table['r'] = '6';
80}
11f885b5 81
81a4c762 82static SV *sv_soundex (SV *source)
11f885b5
SP
83{
84 char *source_p;
85 char *source_end;
86
87 {
88 STRLEN source_len;
89 source_p = SvPV(source, source_len);
90 source_end = &source_p[source_len];
91 }
92
93 while (source_p != source_end)
94 {
ef0f5379
SP
95 char codepart_last = sv_soundex_table[(unsigned char) *source_p];
96
97 if (codepart_last != '\0')
11f885b5
SP
98 {
99 SV *code = newSV(SOUNDEX_ACCURACY);
100 char *code_p = SvPVX(code);
101 char *code_end = &code_p[SOUNDEX_ACCURACY];
11f885b5
SP
102
103 SvCUR_set(code, SOUNDEX_ACCURACY);
104 SvPOK_only(code);
105
ef0f5379 106 *code_p++ = toupper(*source_p++);
11f885b5
SP
107
108 while (source_p != source_end && code_p != code_end)
109 {
110 char c = *source_p++;
ef0f5379 111 char codepart = sv_soundex_table[(unsigned char) c];
11f885b5 112
ef0f5379
SP
113 if (codepart != '\0')
114 if (codepart != codepart_last && (codepart_last = codepart) != '0')
115 *code_p++ = codepart;
11f885b5
SP
116 }
117
118 while (code_p != code_end)
119 *code_p++ = '0';
120
121 *code_end = '\0';
122
123 return code;
124 }
125
126 source_p++;
127 }
128
129 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
130}
131
81a4c762 132static SV *sv_soundex_utf8 (SV* source)
11f885b5
SP
133{
134 U8 *source_p;
135 U8 *source_end;
136
137 {
138 STRLEN source_len;
139 source_p = (U8 *) SvPV(source, source_len);
140 source_end = &source_p[source_len];
141 }
142
143 while (source_p < source_end)
144 {
145 STRLEN offset;
146 UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
ef0f5379 147 char codepart_last = (c <= 0xFF) ? sv_soundex_table[c] : '\0';
11f885b5
SP
148 source_p = (offset >= 1) ? &source_p[offset] : source_end;
149
ef0f5379 150 if (codepart_last != '\0')
11f885b5
SP
151 {
152 SV *code = newSV(SOUNDEX_ACCURACY);
153 char *code_p = SvPVX(code);
154 char *code_end = &code_p[SOUNDEX_ACCURACY];
11f885b5
SP
155
156 SvCUR_set(code, SOUNDEX_ACCURACY);
157 SvPOK_only(code);
158
ef0f5379 159 *code_p++ = toupper(c);
11f885b5
SP
160
161 while (source_p != source_end && code_p != code_end)
162 {
ef0f5379 163 char codepart;
11f885b5 164 c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
ef0f5379 165 codepart = (c <= 0xFF) ? sv_soundex_table[c] : '\0';
11f885b5
SP
166 source_p = (offset >= 1) ? &source_p[offset] : source_end;
167
ef0f5379
SP
168 if (codepart != '\0')
169 if (codepart != codepart_last && (codepart_last = codepart) != '0')
170 *code_p++ = codepart;
11f885b5
SP
171 }
172
173 while (code_p != code_end)
174 *code_p++ = '0';
175
176 *code_end = '\0';
177
178 return code;
179 }
180
181 source_p++;
182 }
183
184 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
185}
186
187MODULE = Text::Soundex PACKAGE = Text::Soundex
188
189PROTOTYPES: DISABLE
190
191void
192soundex_xs (...)
ef0f5379
SP
193INIT:
194{
195 sv_soundex_initialize();
196}
11f885b5
SP
197PPCODE:
198{
199 int i;
200 for (i = 0; i < items; i++)
201 {
202 SV *sv = ST(i);
203
204 if (DO_UTF8(sv))
205 sv = sv_soundex_utf8(sv);
206 else
207 sv = sv_soundex(sv);
208
209 PUSHs(sv_2mortal(sv));
210 }
211}