This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate:
[perl5.git] / ext / SDBM_File / sdbm / sdbm.c
CommitLineData
463ee0b2
LW
1/*
2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
6 *
7 * core routines
8 */
9
17f28c40 10#include "INTERN.h"
85e6fe83 11#include "config.h"
4f63d024
GS
12#ifdef WIN32
13#include "io.h"
14#endif
463ee0b2
LW
15#include "sdbm.h"
16#include "tune.h"
17#include "pair.h"
18
85e6fe83
LW
19#ifdef I_FCNTL
20# include <fcntl.h>
463ee0b2 21#endif
85e6fe83
LW
22#ifdef I_SYS_FILE
23# include <sys/file.h>
463ee0b2
LW
24#endif
25
85e6fe83 26#ifdef I_STRING
89e79dea
JH
27# ifndef __ultrix__
28# include <string.h>
29# endif
85e6fe83
LW
30#else
31# include <strings.h>
463ee0b2
LW
32#endif
33
34/*
35 * externals
36 */
137443ea 37#ifndef WIN32
463ee0b2
LW
38#ifndef sun
39extern int errno;
40#endif
a5b1c163 41#endif
463ee0b2 42
85e6fe83 43extern Malloc_t malloc proto((MEM_SIZE));
851efeba 44extern Free_t free proto((Malloc_t));
bf0c440f 45
463ee0b2
LW
46/*
47 * forward
48 */
49static int getdbit proto((DBM *, long));
50static int setdbit proto((DBM *, long));
51static int getpage proto((DBM *, long));
52static datum getnext proto((DBM *));
53static int makroom proto((DBM *, long, int));
54
55/*
56 * useful macros
57 */
58#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
59#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
60#define ioerr(db) ((db)->flags |= DBM_IOERR)
61
62#define OFF_PAG(off) (long) (off) * PBLKSIZ
63#define OFF_DIR(off) (long) (off) * DBLKSIZ
64
65static long masks[] = {
66 000000000000, 000000000001, 000000000003, 000000000007,
67 000000000017, 000000000037, 000000000077, 000000000177,
68 000000000377, 000000000777, 000000001777, 000000003777,
69 000000007777, 000000017777, 000000037777, 000000077777,
70 000000177777, 000000377777, 000000777777, 000001777777,
71 000003777777, 000007777777, 000017777777, 000037777777,
72 000077777777, 000177777777, 000377777777, 000777777777,
73 001777777777, 003777777777, 007777777777, 017777777777
74};
75
463ee0b2 76DBM *
f0f333f4 77sdbm_open(register char *file, register int flags, register int mode)
463ee0b2
LW
78{
79 register DBM *db;
80 register char *dirname;
81 register char *pagname;
82 register int n;
83
84 if (file == NULL || !*file)
85 return errno = EINVAL, (DBM *) NULL;
86/*
87 * need space for two seperate filenames
88 */
89 n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
90
f0f333f4 91 if ((dirname = (char *) malloc((unsigned) n)) == NULL)
463ee0b2
LW
92 return errno = ENOMEM, (DBM *) NULL;
93/*
94 * build the file names
95 */
96 dirname = strcat(strcpy(dirname, file), DIRFEXT);
97 pagname = strcpy(dirname + strlen(dirname) + 1, file);
98 pagname = strcat(pagname, PAGFEXT);
99
100 db = sdbm_prep(dirname, pagname, flags, mode);
101 free((char *) dirname);
102 return db;
103}
104
105DBM *
f0f333f4 106sdbm_prep(char *dirname, char *pagname, int flags, int mode)
463ee0b2
LW
107{
108 register DBM *db;
109 struct stat dstat;
110
111 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
112 return errno = ENOMEM, (DBM *) NULL;
113
114 db->flags = 0;
115 db->hmask = 0;
116 db->blkptr = 0;
117 db->keyptr = 0;
118/*
119 * adjust user flags so that WRONLY becomes RDWR,
120 * as required by this package. Also set our internal
121 * flag for RDONLY if needed.
122 */
123 if (flags & O_WRONLY)
124 flags = (flags & ~O_WRONLY) | O_RDWR;
125
126 else if ((flags & 03) == O_RDONLY)
127 db->flags = DBM_RDONLY;
128/*
129 * open the files in sequence, and stat the dirfile.
130 * If we fail anywhere, undo everything, return NULL.
131 */
1761cee5 132#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
4633a7c4
LW
133 flags |= O_BINARY;
134# endif
463ee0b2
LW
135 if ((db->pagf = open(pagname, flags, mode)) > -1) {
136 if ((db->dirf = open(dirname, flags, mode)) > -1) {
137/*
138 * need the dirfile size to establish max bit number.
139 */
140 if (fstat(db->dirf, &dstat) == 0) {
141/*
142 * zero size: either a fresh database, or one with a single,
143 * unsplit data page: dirpage is all zeros.
144 */
145 db->dirbno = (!dstat.st_size) ? 0 : -1;
146 db->pagbno = -1;
147 db->maxbno = dstat.st_size * BYTESIZ;
148
149 (void) memset(db->pagbuf, 0, PBLKSIZ);
150 (void) memset(db->dirbuf, 0, DBLKSIZ);
151 /*
152 * success
153 */
154 return db;
155 }
156 (void) close(db->dirf);
157 }
158 (void) close(db->pagf);
159 }
160 free((char *) db);
161 return (DBM *) NULL;
162}
163
164void
f0f333f4 165sdbm_close(register DBM *db)
463ee0b2
LW
166{
167 if (db == NULL)
168 errno = EINVAL;
169 else {
170 (void) close(db->dirf);
171 (void) close(db->pagf);
172 free((char *) db);
173 }
174}
175
176datum
f0f333f4 177sdbm_fetch(register DBM *db, datum key)
463ee0b2
LW
178{
179 if (db == NULL || bad(key))
180 return errno = EINVAL, nullitem;
181
182 if (getpage(db, exhash(key)))
183 return getpair(db->pagbuf, key);
184
185 return ioerr(db), nullitem;
186}
187
188int
f4b9d880
RA
189sdbm_exists(register DBM *db, datum key)
190{
191 if (db == NULL || bad(key))
192 return errno = EINVAL, -1;
193
194 if (getpage(db, exhash(key)))
195 return exipair(db->pagbuf, key);
196
197 return ioerr(db), -1;
198}
199
200int
f0f333f4 201sdbm_delete(register DBM *db, datum key)
463ee0b2
LW
202{
203 if (db == NULL || bad(key))
204 return errno = EINVAL, -1;
205 if (sdbm_rdonly(db))
206 return errno = EPERM, -1;
207
208 if (getpage(db, exhash(key))) {
209 if (!delpair(db->pagbuf, key))
210 return -1;
211/*
212 * update the page file
213 */
214 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
215 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
216 return ioerr(db), -1;
217
218 return 0;
219 }
220
221 return ioerr(db), -1;
222}
223
224int
f0f333f4 225sdbm_store(register DBM *db, datum key, datum val, int flags)
463ee0b2
LW
226{
227 int need;
228 register long hash;
229
230 if (db == NULL || bad(key))
231 return errno = EINVAL, -1;
232 if (sdbm_rdonly(db))
233 return errno = EPERM, -1;
234
235 need = key.dsize + val.dsize;
236/*
237 * is the pair too big (or too small) for this database ??
238 */
239 if (need < 0 || need > PAIRMAX)
240 return errno = EINVAL, -1;
241
242 if (getpage(db, (hash = exhash(key)))) {
243/*
244 * if we need to replace, delete the key/data pair
245 * first. If it is not there, ignore.
246 */
247 if (flags == DBM_REPLACE)
248 (void) delpair(db->pagbuf, key);
249#ifdef SEEDUPS
250 else if (duppair(db->pagbuf, key))
251 return 1;
252#endif
253/*
254 * if we do not have enough room, we have to split.
255 */
256 if (!fitpair(db->pagbuf, need))
257 if (!makroom(db, hash, need))
258 return ioerr(db), -1;
259/*
260 * we have enough room or split is successful. insert the key,
261 * and update the page file.
262 */
263 (void) putpair(db->pagbuf, key, val);
264
265 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
266 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
267 return ioerr(db), -1;
268 /*
269 * success
270 */
271 return 0;
272 }
273
274 return ioerr(db), -1;
275}
276
277/*
278 * makroom - make room by splitting the overfull page
279 * this routine will attempt to make room for SPLTMAX times before
280 * giving up.
281 */
282static int
f0f333f4 283makroom(register DBM *db, long int hash, int need)
463ee0b2
LW
284{
285 long newp;
286 char twin[PBLKSIZ];
f6bbbfc7
NS
287#if defined(DOSISH) || defined(WIN32)
288 char zer[PBLKSIZ];
289 long oldtail;
290#endif
463ee0b2 291 char *pag = db->pagbuf;
f0f333f4 292 char *New = twin;
463ee0b2
LW
293 register int smax = SPLTMAX;
294
295 do {
296/*
297 * split the current page
298 */
f0f333f4 299 (void) splpage(pag, New, db->hmask + 1);
463ee0b2
LW
300/*
301 * address of the new page
302 */
303 newp = (hash & db->hmask) | (db->hmask + 1);
304
305/*
306 * write delay, read avoidence/cache shuffle:
307 * select the page for incoming pair: if key is to go to the new page,
308 * write out the previous one, and copy the new one over, thus making
309 * it the current page. If not, simply write the new page, and we are
310 * still looking at the page of interest. current page is not updated
311 * here, as sdbm_store will do so, after it inserts the incoming pair.
312 */
f6bbbfc7
NS
313
314#if defined(DOSISH) || defined(WIN32)
315 /*
316 * Fill hole with 0 if made it.
317 * (hole is NOT read as 0)
318 */
319 oldtail = lseek(db->pagf, 0L, SEEK_END);
320 memset(zer, 0, PBLKSIZ);
321 while (OFF_PAG(newp) > oldtail) {
322 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
323 write(db->pagf, zer, PBLKSIZ) < 0) {
324
325 return 0;
326 }
327 oldtail += PBLKSIZ;
328 }
329#endif
463ee0b2
LW
330 if (hash & (db->hmask + 1)) {
331 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
332 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
333 return 0;
334 db->pagbno = newp;
f0f333f4 335 (void) memcpy(pag, New, PBLKSIZ);
463ee0b2
LW
336 }
337 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
f0f333f4 338 || write(db->pagf, New, PBLKSIZ) < 0)
463ee0b2
LW
339 return 0;
340
341 if (!setdbit(db, db->curbit))
342 return 0;
343/*
344 * see if we have enough room now
345 */
346 if (fitpair(pag, need))
347 return 1;
348/*
349 * try again... update curbit and hmask as getpage would have
350 * done. because of our update of the current page, we do not
351 * need to read in anything. BUT we have to write the current
352 * [deferred] page out, as the window of failure is too great.
353 */
354 db->curbit = 2 * db->curbit +
355 ((hash & (db->hmask + 1)) ? 2 : 1);
356 db->hmask |= db->hmask + 1;
357
358 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
359 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
360 return 0;
361
362 } while (--smax);
363/*
364 * if we are here, this is real bad news. After SPLTMAX splits,
365 * we still cannot fit the key. say goodnight.
366 */
367#ifdef BADMESS
368 (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
369#endif
370 return 0;
371
372}
373
374/*
375 * the following two routines will break if
376 * deletions aren't taken into account. (ndbm bug)
377 */
378datum
f0f333f4 379sdbm_firstkey(register DBM *db)
463ee0b2
LW
380{
381 if (db == NULL)
382 return errno = EINVAL, nullitem;
383/*
384 * start at page 0
385 */
386 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
387 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
388 return ioerr(db), nullitem;
389 db->pagbno = 0;
390 db->blkptr = 0;
391 db->keyptr = 0;
392
393 return getnext(db);
394}
395
396datum
f0f333f4 397sdbm_nextkey(register DBM *db)
463ee0b2
LW
398{
399 if (db == NULL)
400 return errno = EINVAL, nullitem;
401 return getnext(db);
402}
403
404/*
405 * all important binary trie traversal
406 */
407static int
f0f333f4 408getpage(register DBM *db, register long int hash)
463ee0b2
LW
409{
410 register int hbit;
411 register long dbit;
412 register long pagb;
413
414 dbit = 0;
415 hbit = 0;
416 while (dbit < db->maxbno && getdbit(db, dbit))
417 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
418
419 debug(("dbit: %d...", dbit));
420
421 db->curbit = dbit;
422 db->hmask = masks[hbit];
423
424 pagb = hash & db->hmask;
425/*
426 * see if the block we need is already in memory.
427 * note: this lookaside cache has about 10% hit rate.
428 */
429 if (pagb != db->pagbno) {
430/*
431 * note: here, we assume a "hole" is read as 0s.
432 * if not, must zero pagbuf first.
433 */
434 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
435 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
436 return 0;
437 if (!chkpage(db->pagbuf))
438 return 0;
439 db->pagbno = pagb;
440
441 debug(("pag read: %d\n", pagb));
442 }
443 return 1;
444}
445
446static int
f0f333f4 447getdbit(register DBM *db, register long int dbit)
463ee0b2
LW
448{
449 register long c;
450 register long dirb;
451
452 c = dbit / BYTESIZ;
453 dirb = c / DBLKSIZ;
454
455 if (dirb != db->dirbno) {
98627ae8 456 int got;
463ee0b2 457 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 458 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 459 return 0;
98627ae8
GS
460 if (got==0)
461 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2
LW
462 db->dirbno = dirb;
463
464 debug(("dir read: %d\n", dirb));
465 }
466
467 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
468}
469
470static int
f0f333f4 471setdbit(register DBM *db, register long int dbit)
463ee0b2
LW
472{
473 register long c;
474 register long dirb;
475
476 c = dbit / BYTESIZ;
477 dirb = c / DBLKSIZ;
478
479 if (dirb != db->dirbno) {
98627ae8 480 int got;
463ee0b2 481 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 482 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 483 return 0;
98627ae8
GS
484 if (got==0)
485 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2
LW
486 db->dirbno = dirb;
487
488 debug(("dir read: %d\n", dirb));
489 }
490
491 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
492
98627ae8 493#if 0
463ee0b2
LW
494 if (dbit >= db->maxbno)
495 db->maxbno += DBLKSIZ * BYTESIZ;
98627ae8
GS
496#else
497 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
498 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
499#endif
463ee0b2
LW
500
501 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
502 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
503 return 0;
504
505 return 1;
506}
507
508/*
509 * getnext - get the next key in the page, and if done with
510 * the page, try the next page in sequence
511 */
512static datum
f0f333f4 513getnext(register DBM *db)
463ee0b2
LW
514{
515 datum key;
516
517 for (;;) {
518 db->keyptr++;
519 key = getnkey(db->pagbuf, db->keyptr);
520 if (key.dptr != NULL)
521 return key;
522/*
523 * we either run out, or there is nothing on this page..
524 * try the next one... If we lost our position on the
525 * file, we will have to seek.
526 */
527 db->keyptr = 0;
528 if (db->pagbno != db->blkptr++)
529 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
530 break;
531 db->pagbno = db->blkptr;
532 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
533 break;
534 if (!chkpage(db->pagbuf))
535 break;
536 }
537
538 return ioerr(db), nullitem;
539}
85e6fe83 540