Commit | Line | Data |
---|---|---|
11f885b5 SP |
1 | /* -*- c -*- */ |
2 | ||
3 | /* (c) Copyright 1998-2003 by Mark Mielke | |
4 | * | |
5 | * Freedom to use these sources for whatever you want, as long as credit | |
6 | * is given where credit is due, is hereby granted. You may make modifications | |
7 | * where you see fit but leave this copyright somewhere visible. As well try | |
8 | * to initial any changes you make so that if i like the changes i can | |
9 | * incorporate them into any later versions of mine. | |
10 | * | |
11 | * - Mark Mielke <mark@mielke.cc> | |
12 | */ | |
13 | ||
14 | #include "EXTERN.h" | |
15 | #include "perl.h" | |
16 | #include "XSUB.h" | |
17 | ||
18 | #define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */ | |
19 | ||
20 | #if !(PERL_REVISION >= 5 && PERL_VERSION >= 8) | |
21 | # define utf8n_to_uvchr utf8_to_uv | |
22 | #endif | |
23 | ||
24 | static char *soundex_table = | |
25 | /*ABCDEFGHIJKLMNOPQRSTUVWXYZ*/ | |
26 | "01230120022455012623010202"; | |
27 | ||
81a4c762 | 28 | static SV *sv_soundex (SV *source) |
11f885b5 SP |
29 | { |
30 | char *source_p; | |
31 | char *source_end; | |
32 | ||
33 | { | |
34 | STRLEN source_len; | |
35 | source_p = SvPV(source, source_len); | |
36 | source_end = &source_p[source_len]; | |
37 | } | |
38 | ||
39 | while (source_p != source_end) | |
40 | { | |
41 | if ((*source_p & ~((UV) 0x7F)) == 0 && isalpha(*source_p)) | |
42 | { | |
43 | SV *code = newSV(SOUNDEX_ACCURACY); | |
44 | char *code_p = SvPVX(code); | |
45 | char *code_end = &code_p[SOUNDEX_ACCURACY]; | |
46 | char code_last; | |
47 | ||
48 | SvCUR_set(code, SOUNDEX_ACCURACY); | |
49 | SvPOK_only(code); | |
50 | ||
51 | code_last = soundex_table[(*code_p++ = toupper(*source_p++)) - 'A']; | |
52 | ||
53 | while (source_p != source_end && code_p != code_end) | |
54 | { | |
55 | char c = *source_p++; | |
56 | ||
57 | if ((c & ~((UV) 0x7F)) == 0 && isalpha(c)) | |
58 | { | |
59 | *code_p = soundex_table[toupper(c) - 'A']; | |
60 | if (*code_p != code_last && (code_last = *code_p) != '0') | |
61 | code_p++; | |
62 | } | |
63 | } | |
64 | ||
65 | while (code_p != code_end) | |
66 | *code_p++ = '0'; | |
67 | ||
68 | *code_end = '\0'; | |
69 | ||
70 | return code; | |
71 | } | |
72 | ||
73 | source_p++; | |
74 | } | |
75 | ||
76 | return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE)); | |
77 | } | |
78 | ||
81a4c762 | 79 | static SV *sv_soundex_utf8 (SV* source) |
11f885b5 SP |
80 | { |
81 | U8 *source_p; | |
82 | U8 *source_end; | |
83 | ||
84 | { | |
85 | STRLEN source_len; | |
86 | source_p = (U8 *) SvPV(source, source_len); | |
87 | source_end = &source_p[source_len]; | |
88 | } | |
89 | ||
90 | while (source_p < source_end) | |
91 | { | |
92 | STRLEN offset; | |
93 | UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0); | |
94 | source_p = (offset >= 1) ? &source_p[offset] : source_end; | |
95 | ||
96 | if ((c & ~((UV) 0x7F)) == 0 && isalpha(c)) | |
97 | { | |
98 | SV *code = newSV(SOUNDEX_ACCURACY); | |
99 | char *code_p = SvPVX(code); | |
100 | char *code_end = &code_p[SOUNDEX_ACCURACY]; | |
101 | char code_last; | |
102 | ||
103 | SvCUR_set(code, SOUNDEX_ACCURACY); | |
104 | SvPOK_only(code); | |
105 | ||
106 | code_last = soundex_table[(*code_p++ = toupper(c)) - 'A']; | |
107 | ||
108 | while (source_p != source_end && code_p != code_end) | |
109 | { | |
110 | c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0); | |
111 | source_p = (offset >= 1) ? &source_p[offset] : source_end; | |
112 | ||
113 | if ((c & ~((UV) 0x7F)) == 0 && isalpha(c)) | |
114 | { | |
115 | *code_p = soundex_table[toupper(c) - 'A']; | |
116 | if (*code_p != code_last && (code_last = *code_p) != '0') | |
117 | code_p++; | |
118 | } | |
119 | } | |
120 | ||
121 | while (code_p != code_end) | |
122 | *code_p++ = '0'; | |
123 | ||
124 | *code_end = '\0'; | |
125 | ||
126 | return code; | |
127 | } | |
128 | ||
129 | source_p++; | |
130 | } | |
131 | ||
132 | return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE)); | |
133 | } | |
134 | ||
135 | MODULE = Text::Soundex PACKAGE = Text::Soundex | |
136 | ||
137 | PROTOTYPES: DISABLE | |
138 | ||
139 | void | |
140 | soundex_xs (...) | |
141 | PPCODE: | |
142 | { | |
143 | int i; | |
144 | for (i = 0; i < items; i++) | |
145 | { | |
146 | SV *sv = ST(i); | |
147 | ||
148 | if (DO_UTF8(sv)) | |
149 | sv = sv_soundex_utf8(sv); | |
150 | else | |
151 | sv = sv_soundex(sv); | |
152 | ||
153 | PUSHs(sv_2mortal(sv)); | |
154 | } | |
155 | } |