2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
23 # include <sys/file.h>
32 #include <errno.h> /* See notes in perl.h about avoiding
38 extern Malloc_t malloc(MEM_SIZE);
39 extern Free_t free(Malloc_t);
45 const datum nullitem = {0, 0};
50 static int getdbit(DBM *, long);
51 static int setdbit(DBM *, long);
52 static int getpage(DBM *, long);
53 static datum getnext(DBM *);
54 static int makroom(DBM *, long, int);
59 #define bad(x) ((x).dptr == NULL || (x).dsize < 0)
60 #define exhash(item) sdbm_hash((item).dptr, (item).dsize)
61 #define ioerr(db) ((db)->flags |= DBM_IOERR)
63 #define OFF_PAG(off) (long) (off) * PBLKSIZ
64 #define OFF_DIR(off) (long) (off) * DBLKSIZ
66 static const long masks[] = {
67 000000000000, 000000000001, 000000000003, 000000000007,
68 000000000017, 000000000037, 000000000077, 000000000177,
69 000000000377, 000000000777, 000000001777, 000000003777,
70 000000007777, 000000017777, 000000037777, 000000077777,
71 000000177777, 000000377777, 000000777777, 000001777777,
72 000003777777, 000007777777, 000017777777, 000037777777,
73 000077777777, 000177777777, 000377777777, 000777777777,
74 001777777777, 003777777777, 007777777777, 017777777777
78 sdbm_open(char *file, int flags, int mode)
84 const size_t dirfext_size = sizeof(DIRFEXT "");
85 const size_t pagfext_size = sizeof(PAGFEXT "");
87 if (file == NULL || !*file)
88 return errno = EINVAL, (DBM *) NULL;
90 * need space for two separate filenames
92 filelen = strlen(file);
94 if ((dirname = (char *) malloc(filelen + dirfext_size
95 + filelen + pagfext_size)) == NULL)
96 return errno = ENOMEM, (DBM *) NULL;
98 * build the file names
100 memcpy(dirname, file, filelen);
101 memcpy(dirname + filelen, DIRFEXT, dirfext_size);
102 pagname = dirname + filelen + dirfext_size;
103 memcpy(pagname, file, filelen);
104 memcpy(pagname + filelen, PAGFEXT, pagfext_size);
106 db = sdbm_prep(dirname, pagname, flags, mode);
107 free((char *) dirname);
112 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
117 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
118 return errno = ENOMEM, (DBM *) NULL;
125 * adjust user flags so that WRONLY becomes RDWR,
126 * as required by this package. Also set our internal
127 * flag for RDONLY if needed.
129 if (flags & O_WRONLY)
130 flags = (flags & ~O_WRONLY) | O_RDWR;
132 else if ((flags & 03) == O_RDONLY)
133 db->flags = DBM_RDONLY;
135 * open the files in sequence, and stat the dirfile.
136 * If we fail anywhere, undo everything, return NULL.
138 #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
141 if ((db->pagf = open(pagname, flags, mode)) > -1) {
142 if ((db->dirf = open(dirname, flags, mode)) > -1) {
144 * need the dirfile size to establish max bit number.
146 if (fstat(db->dirf, &dstat) == 0) {
148 * zero size: either a fresh database, or one with a single,
149 * unsplit data page: dirpage is all zeros.
151 db->dirbno = (!dstat.st_size) ? 0 : -1;
153 db->maxbno = dstat.st_size * BYTESIZ;
155 (void) memset(db->pagbuf, 0, PBLKSIZ);
156 (void) memset(db->dirbuf, 0, DBLKSIZ);
162 (void) close(db->dirf);
164 (void) close(db->pagf);
176 (void) close(db->dirf);
177 (void) close(db->pagf);
183 sdbm_fetch(DBM *db, datum key)
185 if (db == NULL || bad(key))
186 return errno = EINVAL, nullitem;
188 if (getpage(db, exhash(key)))
189 return getpair(db->pagbuf, key);
191 return ioerr(db), nullitem;
195 sdbm_exists(DBM *db, datum key)
197 if (db == NULL || bad(key))
198 return errno = EINVAL, -1;
200 if (getpage(db, exhash(key)))
201 return exipair(db->pagbuf, key);
203 return ioerr(db), -1;
207 sdbm_delete(DBM *db, datum key)
209 if (db == NULL || bad(key))
210 return errno = EINVAL, -1;
212 return errno = EPERM, -1;
214 if (getpage(db, exhash(key))) {
215 if (!delpair(db->pagbuf, key))
218 * update the page file
220 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
221 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
222 return ioerr(db), -1;
227 return ioerr(db), -1;
231 sdbm_store(DBM *db, datum key, datum val, int flags)
236 if (db == NULL || bad(key))
237 return errno = EINVAL, -1;
239 return errno = EPERM, -1;
241 need = key.dsize + val.dsize;
243 * is the pair too big (or too small) for this database ??
245 if (need < 0 || need > PAIRMAX)
246 return errno = EINVAL, -1;
248 if (getpage(db, (hash = exhash(key)))) {
250 * if we need to replace, delete the key/data pair
251 * first. If it is not there, ignore.
253 if (flags == DBM_REPLACE)
254 (void) delpair(db->pagbuf, key);
256 else if (duppair(db->pagbuf, key))
260 * if we do not have enough room, we have to split.
262 if (!fitpair(db->pagbuf, need))
263 if (!makroom(db, hash, need))
264 return ioerr(db), -1;
266 * we have enough room or split is successful. insert the key,
267 * and update the page file.
269 (void) putpair(db->pagbuf, key, val);
271 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
272 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
273 return ioerr(db), -1;
280 return ioerr(db), -1;
284 * makroom - make room by splitting the overfull page
285 * this routine will attempt to make room for SPLTMAX times before
289 makroom(DBM *db, long int hash, int need)
293 #if defined(DOSISH) || defined(WIN32)
297 char *pag = db->pagbuf;
306 * split the current page
308 (void) splpage(pag, New, db->hmask + 1);
310 * address of the new page
312 newp = (hash & db->hmask) | (db->hmask + 1);
315 * write delay, read avoidance/cache shuffle:
316 * select the page for incoming pair: if key is to go to the new page,
317 * write out the previous one, and copy the new one over, thus making
318 * it the current page. If not, simply write the new page, and we are
319 * still looking at the page of interest. current page is not updated
320 * here, as sdbm_store will do so, after it inserts the incoming pair.
323 #if defined(DOSISH) || defined(WIN32)
325 * Fill hole with 0 if made it.
326 * (hole is NOT read as 0)
328 oldtail = lseek(db->pagf, 0L, SEEK_END);
329 memset(zer, 0, PBLKSIZ);
330 while (OFF_PAG(newp) > oldtail) {
331 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
332 write(db->pagf, zer, PBLKSIZ) < 0) {
339 if (hash & (db->hmask + 1)) {
340 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
341 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
344 (void) memcpy(pag, New, PBLKSIZ);
346 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
347 || write(db->pagf, New, PBLKSIZ) < 0)
350 if (!setdbit(db, db->curbit))
353 * see if we have enough room now
355 if (fitpair(pag, need))
358 * try again... update curbit and hmask as getpage would have
359 * done. because of our update of the current page, we do not
360 * need to read in anything. BUT we have to write the current
361 * [deferred] page out, as the window of failure is too great.
363 db->curbit = 2 * db->curbit +
364 ((hash & (db->hmask + 1)) ? 2 : 1);
365 db->hmask |= db->hmask + 1;
367 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
368 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
373 * if we are here, this is real bad news. After SPLTMAX splits,
374 * we still cannot fit the key. say goodnight.
377 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
378 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
379 * useful here but that would mean pulling in perl.h */
387 * the following two routines will break if
388 * deletions aren't taken into account. (ndbm bug)
391 sdbm_firstkey(DBM *db)
394 return errno = EINVAL, nullitem;
398 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
399 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
400 return ioerr(db), nullitem;
409 sdbm_nextkey(DBM *db)
412 return errno = EINVAL, nullitem;
417 * all important binary trie traversal
420 getpage(DBM *db, long int hash)
428 while (dbit < db->maxbno && getdbit(db, dbit))
429 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
431 debug(("dbit: %d...", dbit));
434 db->hmask = masks[hbit];
436 pagb = hash & db->hmask;
438 * see if the block we need is already in memory.
439 * note: this lookaside cache has about 10% hit rate.
441 if (pagb != db->pagbno) {
443 * note: here, we assume a "hole" is read as 0s.
444 * if not, must zero pagbuf first.
446 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
447 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
449 if (!chkpage(db->pagbuf))
453 debug(("pag read: %d\n", pagb));
459 getdbit(DBM *db, long int dbit)
467 if (dirb != db->dirbno) {
469 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
470 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
473 memset(db->dirbuf,0,DBLKSIZ);
476 debug(("dir read: %d\n", dirb));
479 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
483 setdbit(DBM *db, long int dbit)
491 if (dirb != db->dirbno) {
493 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
494 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
497 memset(db->dirbuf,0,DBLKSIZ);
500 debug(("dir read: %d\n", dirb));
503 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
506 if (dbit >= db->maxbno)
507 db->maxbno += DBLKSIZ * BYTESIZ;
509 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
510 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
513 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
514 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
521 * getnext - get the next key in the page, and if done with
522 * the page, try the next page in sequence
531 key = getnkey(db->pagbuf, db->keyptr);
532 if (key.dptr != NULL)
535 * we either run out, or there is nothing on this page..
536 * try the next one... If we lost our position on the
537 * file, we will have to seek.
540 if (db->pagbno != db->blkptr++)
541 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
543 db->pagbno = db->blkptr;
544 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
546 if (!chkpage(db->pagbuf))
550 return ioerr(db), nullitem;