This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[patch@31739] regop.t fix for VMS
[perl5.git] / ext / Text / Soundex / Soundex.xs
CommitLineData
11f885b5
SP
1/* -*- c -*- */
2
3/* (c) Copyright 1998-2003 by Mark Mielke
4 *
5 * Freedom to use these sources for whatever you want, as long as credit
6 * is given where credit is due, is hereby granted. You may make modifications
7 * where you see fit but leave this copyright somewhere visible. As well try
8 * to initial any changes you make so that if i like the changes i can
9 * incorporate them into any later versions of mine.
10 *
11 * - Mark Mielke <mark@mielke.cc>
12 */
13
14#include "EXTERN.h"
15#include "perl.h"
16#include "XSUB.h"
17
18#define SOUNDEX_ACCURACY (4) /* The maximum code length... (should be>=2) */
19
20#if !(PERL_REVISION >= 5 && PERL_VERSION >= 8)
21# define utf8n_to_uvchr utf8_to_uv
22#endif
23
24static char *soundex_table =
25 /*ABCDEFGHIJKLMNOPQRSTUVWXYZ*/
26 "01230120022455012623010202";
27
81a4c762 28static SV *sv_soundex (SV *source)
11f885b5
SP
29{
30 char *source_p;
31 char *source_end;
32
33 {
34 STRLEN source_len;
35 source_p = SvPV(source, source_len);
36 source_end = &source_p[source_len];
37 }
38
39 while (source_p != source_end)
40 {
41 if ((*source_p & ~((UV) 0x7F)) == 0 && isalpha(*source_p))
42 {
43 SV *code = newSV(SOUNDEX_ACCURACY);
44 char *code_p = SvPVX(code);
45 char *code_end = &code_p[SOUNDEX_ACCURACY];
46 char code_last;
47
48 SvCUR_set(code, SOUNDEX_ACCURACY);
49 SvPOK_only(code);
50
51 code_last = soundex_table[(*code_p++ = toupper(*source_p++)) - 'A'];
52
53 while (source_p != source_end && code_p != code_end)
54 {
55 char c = *source_p++;
56
57 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
58 {
59 *code_p = soundex_table[toupper(c) - 'A'];
60 if (*code_p != code_last && (code_last = *code_p) != '0')
61 code_p++;
62 }
63 }
64
65 while (code_p != code_end)
66 *code_p++ = '0';
67
68 *code_end = '\0';
69
70 return code;
71 }
72
73 source_p++;
74 }
75
76 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
77}
78
81a4c762 79static SV *sv_soundex_utf8 (SV* source)
11f885b5
SP
80{
81 U8 *source_p;
82 U8 *source_end;
83
84 {
85 STRLEN source_len;
86 source_p = (U8 *) SvPV(source, source_len);
87 source_end = &source_p[source_len];
88 }
89
90 while (source_p < source_end)
91 {
92 STRLEN offset;
93 UV c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
94 source_p = (offset >= 1) ? &source_p[offset] : source_end;
95
96 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
97 {
98 SV *code = newSV(SOUNDEX_ACCURACY);
99 char *code_p = SvPVX(code);
100 char *code_end = &code_p[SOUNDEX_ACCURACY];
101 char code_last;
102
103 SvCUR_set(code, SOUNDEX_ACCURACY);
104 SvPOK_only(code);
105
106 code_last = soundex_table[(*code_p++ = toupper(c)) - 'A'];
107
108 while (source_p != source_end && code_p != code_end)
109 {
110 c = utf8n_to_uvchr(source_p, source_end-source_p, &offset, 0);
111 source_p = (offset >= 1) ? &source_p[offset] : source_end;
112
113 if ((c & ~((UV) 0x7F)) == 0 && isalpha(c))
114 {
115 *code_p = soundex_table[toupper(c) - 'A'];
116 if (*code_p != code_last && (code_last = *code_p) != '0')
117 code_p++;
118 }
119 }
120
121 while (code_p != code_end)
122 *code_p++ = '0';
123
124 *code_end = '\0';
125
126 return code;
127 }
128
129 source_p++;
130 }
131
132 return SvREFCNT_inc(perl_get_sv("Text::Soundex::nocode", FALSE));
133}
134
135MODULE = Text::Soundex PACKAGE = Text::Soundex
136
137PROTOTYPES: DISABLE
138
139void
140soundex_xs (...)
141PPCODE:
142{
143 int i;
144 for (i = 0; i < items; i++)
145 {
146 SV *sv = ST(i);
147
148 if (DO_UTF8(sv))
149 sv = sv_soundex_utf8(sv);
150 else
151 sv = sv_soundex(sv);
152
153 PUSHs(sv_2mortal(sv));
154 }
155}