Commit | Line | Data |
---|---|---|
463ee0b2 LW |
1 | /* |
2 | * sdbm - ndbm work-alike hashed database library | |
3 | * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). | |
4 | * author: oz@nexus.yorku.ca | |
5 | * status: public domain. | |
6 | * | |
7 | * core routines | |
8 | */ | |
9 | ||
17f28c40 | 10 | #include "INTERN.h" |
85e6fe83 | 11 | #include "config.h" |
4f63d024 GS |
12 | #ifdef WIN32 |
13 | #include "io.h" | |
14 | #endif | |
463ee0b2 LW |
15 | #include "sdbm.h" |
16 | #include "tune.h" | |
17 | #include "pair.h" | |
18 | ||
85e6fe83 LW |
19 | #ifdef I_FCNTL |
20 | # include <fcntl.h> | |
463ee0b2 | 21 | #endif |
85e6fe83 LW |
22 | #ifdef I_SYS_FILE |
23 | # include <sys/file.h> | |
463ee0b2 LW |
24 | #endif |
25 | ||
d54fbe84 | 26 | #include <string.h> |
463ee0b2 LW |
27 | |
28 | /* | |
29 | * externals | |
30 | */ | |
81770b0c AD |
31 | |
32 | #include <errno.h> /* See notes in perl.h about avoiding | |
33 | extern int errno; */ | |
938f40b7 CB |
34 | #ifdef __cplusplus |
35 | extern "C" { | |
36 | #endif | |
463ee0b2 | 37 | |
3d97541c AC |
38 | extern Malloc_t malloc(MEM_SIZE); |
39 | extern Free_t free(Malloc_t); | |
bf0c440f | 40 | |
938f40b7 CB |
41 | #ifdef __cplusplus |
42 | } | |
43 | #endif | |
44 | ||
7b3bf35e JH |
45 | const datum nullitem = {0, 0}; |
46 | ||
463ee0b2 LW |
47 | /* |
48 | * forward | |
49 | */ | |
3d97541c AC |
50 | static int getdbit(DBM *, long); |
51 | static int setdbit(DBM *, long); | |
52 | static int getpage(DBM *, long); | |
53 | static datum getnext(DBM *); | |
54 | static int makroom(DBM *, long, int); | |
463ee0b2 LW |
55 | |
56 | /* | |
57 | * useful macros | |
58 | */ | |
59 | #define bad(x) ((x).dptr == NULL || (x).dsize < 0) | |
60 | #define exhash(item) sdbm_hash((item).dptr, (item).dsize) | |
61 | #define ioerr(db) ((db)->flags |= DBM_IOERR) | |
62 | ||
63 | #define OFF_PAG(off) (long) (off) * PBLKSIZ | |
64 | #define OFF_DIR(off) (long) (off) * DBLKSIZ | |
65 | ||
27da23d5 | 66 | static const long masks[] = { |
463ee0b2 LW |
67 | 000000000000, 000000000001, 000000000003, 000000000007, |
68 | 000000000017, 000000000037, 000000000077, 000000000177, | |
69 | 000000000377, 000000000777, 000000001777, 000000003777, | |
70 | 000000007777, 000000017777, 000000037777, 000000077777, | |
71 | 000000177777, 000000377777, 000000777777, 000001777777, | |
72 | 000003777777, 000007777777, 000017777777, 000037777777, | |
73 | 000077777777, 000177777777, 000377777777, 000777777777, | |
74 | 001777777777, 003777777777, 007777777777, 017777777777 | |
75 | }; | |
76 | ||
463ee0b2 | 77 | DBM * |
5aaab254 | 78 | sdbm_open(char *file, int flags, int mode) |
463ee0b2 | 79 | { |
5aaab254 KW |
80 | DBM *db; |
81 | char *dirname; | |
82 | char *pagname; | |
081f72ad | 83 | size_t filelen; |
acdbe25b RU |
84 | const size_t dirfext_size = sizeof(DIRFEXT ""); |
85 | const size_t pagfext_size = sizeof(PAGFEXT ""); | |
463ee0b2 LW |
86 | |
87 | if (file == NULL || !*file) | |
88 | return errno = EINVAL, (DBM *) NULL; | |
89 | /* | |
b7b1e41b | 90 | * need space for two separate filenames |
463ee0b2 | 91 | */ |
081f72ad | 92 | filelen = strlen(file); |
463ee0b2 | 93 | |
acdbe25b RU |
94 | if ((dirname = (char *) malloc(filelen + dirfext_size |
95 | + filelen + pagfext_size)) == NULL) | |
463ee0b2 LW |
96 | return errno = ENOMEM, (DBM *) NULL; |
97 | /* | |
98 | * build the file names | |
99 | */ | |
081f72ad | 100 | memcpy(dirname, file, filelen); |
acdbe25b RU |
101 | memcpy(dirname + filelen, DIRFEXT, dirfext_size); |
102 | pagname = dirname + filelen + dirfext_size; | |
081f72ad | 103 | memcpy(pagname, file, filelen); |
acdbe25b | 104 | memcpy(pagname + filelen, PAGFEXT, pagfext_size); |
463ee0b2 LW |
105 | |
106 | db = sdbm_prep(dirname, pagname, flags, mode); | |
107 | free((char *) dirname); | |
108 | return db; | |
109 | } | |
110 | ||
111 | DBM * | |
f0f333f4 | 112 | sdbm_prep(char *dirname, char *pagname, int flags, int mode) |
463ee0b2 | 113 | { |
5aaab254 | 114 | DBM *db; |
463ee0b2 LW |
115 | struct stat dstat; |
116 | ||
117 | if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) | |
118 | return errno = ENOMEM, (DBM *) NULL; | |
119 | ||
120 | db->flags = 0; | |
121 | db->hmask = 0; | |
122 | db->blkptr = 0; | |
123 | db->keyptr = 0; | |
124 | /* | |
125 | * adjust user flags so that WRONLY becomes RDWR, | |
126 | * as required by this package. Also set our internal | |
127 | * flag for RDONLY if needed. | |
128 | */ | |
129 | if (flags & O_WRONLY) | |
130 | flags = (flags & ~O_WRONLY) | O_RDWR; | |
131 | ||
132 | else if ((flags & 03) == O_RDONLY) | |
133 | db->flags = DBM_RDONLY; | |
134 | /* | |
135 | * open the files in sequence, and stat the dirfile. | |
136 | * If we fail anywhere, undo everything, return NULL. | |
137 | */ | |
1761cee5 | 138 | #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__) |
4633a7c4 LW |
139 | flags |= O_BINARY; |
140 | # endif | |
463ee0b2 LW |
141 | if ((db->pagf = open(pagname, flags, mode)) > -1) { |
142 | if ((db->dirf = open(dirname, flags, mode)) > -1) { | |
143 | /* | |
144 | * need the dirfile size to establish max bit number. | |
145 | */ | |
146 | if (fstat(db->dirf, &dstat) == 0) { | |
147 | /* | |
148 | * zero size: either a fresh database, or one with a single, | |
149 | * unsplit data page: dirpage is all zeros. | |
150 | */ | |
151 | db->dirbno = (!dstat.st_size) ? 0 : -1; | |
152 | db->pagbno = -1; | |
153 | db->maxbno = dstat.st_size * BYTESIZ; | |
154 | ||
155 | (void) memset(db->pagbuf, 0, PBLKSIZ); | |
156 | (void) memset(db->dirbuf, 0, DBLKSIZ); | |
157 | /* | |
158 | * success | |
159 | */ | |
160 | return db; | |
161 | } | |
162 | (void) close(db->dirf); | |
163 | } | |
164 | (void) close(db->pagf); | |
165 | } | |
166 | free((char *) db); | |
167 | return (DBM *) NULL; | |
168 | } | |
169 | ||
170 | void | |
5aaab254 | 171 | sdbm_close(DBM *db) |
463ee0b2 LW |
172 | { |
173 | if (db == NULL) | |
174 | errno = EINVAL; | |
175 | else { | |
176 | (void) close(db->dirf); | |
177 | (void) close(db->pagf); | |
178 | free((char *) db); | |
179 | } | |
180 | } | |
181 | ||
182 | datum | |
5aaab254 | 183 | sdbm_fetch(DBM *db, datum key) |
463ee0b2 LW |
184 | { |
185 | if (db == NULL || bad(key)) | |
186 | return errno = EINVAL, nullitem; | |
187 | ||
188 | if (getpage(db, exhash(key))) | |
189 | return getpair(db->pagbuf, key); | |
190 | ||
191 | return ioerr(db), nullitem; | |
192 | } | |
193 | ||
194 | int | |
5aaab254 | 195 | sdbm_exists(DBM *db, datum key) |
f4b9d880 RA |
196 | { |
197 | if (db == NULL || bad(key)) | |
198 | return errno = EINVAL, -1; | |
199 | ||
200 | if (getpage(db, exhash(key))) | |
201 | return exipair(db->pagbuf, key); | |
202 | ||
203 | return ioerr(db), -1; | |
204 | } | |
205 | ||
206 | int | |
5aaab254 | 207 | sdbm_delete(DBM *db, datum key) |
463ee0b2 LW |
208 | { |
209 | if (db == NULL || bad(key)) | |
210 | return errno = EINVAL, -1; | |
211 | if (sdbm_rdonly(db)) | |
212 | return errno = EPERM, -1; | |
213 | ||
214 | if (getpage(db, exhash(key))) { | |
215 | if (!delpair(db->pagbuf, key)) | |
216 | return -1; | |
217 | /* | |
218 | * update the page file | |
219 | */ | |
220 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 | |
221 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) | |
222 | return ioerr(db), -1; | |
223 | ||
224 | return 0; | |
225 | } | |
226 | ||
227 | return ioerr(db), -1; | |
228 | } | |
229 | ||
230 | int | |
5aaab254 | 231 | sdbm_store(DBM *db, datum key, datum val, int flags) |
463ee0b2 LW |
232 | { |
233 | int need; | |
5aaab254 | 234 | long hash; |
463ee0b2 LW |
235 | |
236 | if (db == NULL || bad(key)) | |
237 | return errno = EINVAL, -1; | |
238 | if (sdbm_rdonly(db)) | |
239 | return errno = EPERM, -1; | |
240 | ||
241 | need = key.dsize + val.dsize; | |
242 | /* | |
243 | * is the pair too big (or too small) for this database ?? | |
244 | */ | |
245 | if (need < 0 || need > PAIRMAX) | |
246 | return errno = EINVAL, -1; | |
247 | ||
248 | if (getpage(db, (hash = exhash(key)))) { | |
249 | /* | |
250 | * if we need to replace, delete the key/data pair | |
251 | * first. If it is not there, ignore. | |
252 | */ | |
253 | if (flags == DBM_REPLACE) | |
254 | (void) delpair(db->pagbuf, key); | |
255 | #ifdef SEEDUPS | |
256 | else if (duppair(db->pagbuf, key)) | |
257 | return 1; | |
258 | #endif | |
259 | /* | |
260 | * if we do not have enough room, we have to split. | |
261 | */ | |
262 | if (!fitpair(db->pagbuf, need)) | |
263 | if (!makroom(db, hash, need)) | |
264 | return ioerr(db), -1; | |
265 | /* | |
266 | * we have enough room or split is successful. insert the key, | |
267 | * and update the page file. | |
268 | */ | |
269 | (void) putpair(db->pagbuf, key, val); | |
270 | ||
271 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 | |
272 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) | |
273 | return ioerr(db), -1; | |
274 | /* | |
275 | * success | |
276 | */ | |
277 | return 0; | |
278 | } | |
279 | ||
280 | return ioerr(db), -1; | |
281 | } | |
282 | ||
283 | /* | |
284 | * makroom - make room by splitting the overfull page | |
285 | * this routine will attempt to make room for SPLTMAX times before | |
286 | * giving up. | |
287 | */ | |
288 | static int | |
5aaab254 | 289 | makroom(DBM *db, long int hash, int need) |
463ee0b2 LW |
290 | { |
291 | long newp; | |
292 | char twin[PBLKSIZ]; | |
f6bbbfc7 NS |
293 | #if defined(DOSISH) || defined(WIN32) |
294 | char zer[PBLKSIZ]; | |
295 | long oldtail; | |
296 | #endif | |
463ee0b2 | 297 | char *pag = db->pagbuf; |
f0f333f4 | 298 | char *New = twin; |
5aaab254 | 299 | int smax = SPLTMAX; |
04783dc7 DM |
300 | #ifdef BADMESS |
301 | int rc; | |
302 | #endif | |
463ee0b2 LW |
303 | |
304 | do { | |
305 | /* | |
306 | * split the current page | |
307 | */ | |
f0f333f4 | 308 | (void) splpage(pag, New, db->hmask + 1); |
463ee0b2 LW |
309 | /* |
310 | * address of the new page | |
311 | */ | |
312 | newp = (hash & db->hmask) | (db->hmask + 1); | |
313 | ||
314 | /* | |
b7b1e41b | 315 | * write delay, read avoidance/cache shuffle: |
463ee0b2 LW |
316 | * select the page for incoming pair: if key is to go to the new page, |
317 | * write out the previous one, and copy the new one over, thus making | |
318 | * it the current page. If not, simply write the new page, and we are | |
319 | * still looking at the page of interest. current page is not updated | |
320 | * here, as sdbm_store will do so, after it inserts the incoming pair. | |
321 | */ | |
f6bbbfc7 NS |
322 | |
323 | #if defined(DOSISH) || defined(WIN32) | |
324 | /* | |
325 | * Fill hole with 0 if made it. | |
326 | * (hole is NOT read as 0) | |
327 | */ | |
328 | oldtail = lseek(db->pagf, 0L, SEEK_END); | |
329 | memset(zer, 0, PBLKSIZ); | |
330 | while (OFF_PAG(newp) > oldtail) { | |
331 | if (lseek(db->pagf, 0L, SEEK_END) < 0 || | |
332 | write(db->pagf, zer, PBLKSIZ) < 0) { | |
333 | ||
334 | return 0; | |
335 | } | |
336 | oldtail += PBLKSIZ; | |
337 | } | |
338 | #endif | |
463ee0b2 LW |
339 | if (hash & (db->hmask + 1)) { |
340 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 | |
341 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) | |
342 | return 0; | |
343 | db->pagbno = newp; | |
f0f333f4 | 344 | (void) memcpy(pag, New, PBLKSIZ); |
463ee0b2 LW |
345 | } |
346 | else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 | |
f0f333f4 | 347 | || write(db->pagf, New, PBLKSIZ) < 0) |
463ee0b2 LW |
348 | return 0; |
349 | ||
350 | if (!setdbit(db, db->curbit)) | |
351 | return 0; | |
352 | /* | |
353 | * see if we have enough room now | |
354 | */ | |
355 | if (fitpair(pag, need)) | |
356 | return 1; | |
357 | /* | |
358 | * try again... update curbit and hmask as getpage would have | |
359 | * done. because of our update of the current page, we do not | |
360 | * need to read in anything. BUT we have to write the current | |
361 | * [deferred] page out, as the window of failure is too great. | |
362 | */ | |
363 | db->curbit = 2 * db->curbit + | |
364 | ((hash & (db->hmask + 1)) ? 2 : 1); | |
365 | db->hmask |= db->hmask + 1; | |
366 | ||
367 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 | |
368 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) | |
369 | return 0; | |
370 | ||
371 | } while (--smax); | |
372 | /* | |
373 | * if we are here, this is real bad news. After SPLTMAX splits, | |
374 | * we still cannot fit the key. say goodnight. | |
375 | */ | |
376 | #ifdef BADMESS | |
04783dc7 | 377 | rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); |
b469f1e0 JH |
378 | /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be |
379 | * useful here but that would mean pulling in perl.h */ | |
380 | (void)rc; | |
463ee0b2 LW |
381 | #endif |
382 | return 0; | |
383 | ||
384 | } | |
385 | ||
386 | /* | |
387 | * the following two routines will break if | |
388 | * deletions aren't taken into account. (ndbm bug) | |
389 | */ | |
390 | datum | |
5aaab254 | 391 | sdbm_firstkey(DBM *db) |
463ee0b2 LW |
392 | { |
393 | if (db == NULL) | |
394 | return errno = EINVAL, nullitem; | |
395 | /* | |
396 | * start at page 0 | |
397 | */ | |
398 | if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 | |
399 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) | |
400 | return ioerr(db), nullitem; | |
36a4593d TC |
401 | if (!chkpage(db->pagbuf)) { |
402 | errno = EINVAL; | |
403 | ioerr(db); | |
404 | db->pagbno = -1; | |
405 | return nullitem; | |
406 | } | |
463ee0b2 LW |
407 | db->pagbno = 0; |
408 | db->blkptr = 0; | |
409 | db->keyptr = 0; | |
410 | ||
411 | return getnext(db); | |
412 | } | |
413 | ||
414 | datum | |
5aaab254 | 415 | sdbm_nextkey(DBM *db) |
463ee0b2 LW |
416 | { |
417 | if (db == NULL) | |
418 | return errno = EINVAL, nullitem; | |
419 | return getnext(db); | |
420 | } | |
421 | ||
422 | /* | |
423 | * all important binary trie traversal | |
424 | */ | |
425 | static int | |
5aaab254 | 426 | getpage(DBM *db, long int hash) |
463ee0b2 | 427 | { |
5aaab254 KW |
428 | int hbit; |
429 | long dbit; | |
430 | long pagb; | |
463ee0b2 LW |
431 | |
432 | dbit = 0; | |
433 | hbit = 0; | |
434 | while (dbit < db->maxbno && getdbit(db, dbit)) | |
435 | dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); | |
436 | ||
437 | debug(("dbit: %d...", dbit)); | |
438 | ||
439 | db->curbit = dbit; | |
440 | db->hmask = masks[hbit]; | |
441 | ||
442 | pagb = hash & db->hmask; | |
443 | /* | |
444 | * see if the block we need is already in memory. | |
445 | * note: this lookaside cache has about 10% hit rate. | |
446 | */ | |
447 | if (pagb != db->pagbno) { | |
448 | /* | |
449 | * note: here, we assume a "hole" is read as 0s. | |
450 | * if not, must zero pagbuf first. | |
451 | */ | |
452 | if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 | |
453 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) | |
454 | return 0; | |
36a4593d TC |
455 | if (!chkpage(db->pagbuf)) { |
456 | errno = EINVAL; | |
457 | db->pagbno = -1; | |
458 | ioerr(db); | |
459 | return 0; | |
460 | } | |
463ee0b2 LW |
461 | db->pagbno = pagb; |
462 | ||
463 | debug(("pag read: %d\n", pagb)); | |
464 | } | |
465 | return 1; | |
466 | } | |
467 | ||
468 | static int | |
5aaab254 | 469 | getdbit(DBM *db, long int dbit) |
463ee0b2 | 470 | { |
5aaab254 KW |
471 | long c; |
472 | long dirb; | |
463ee0b2 LW |
473 | |
474 | c = dbit / BYTESIZ; | |
475 | dirb = c / DBLKSIZ; | |
476 | ||
477 | if (dirb != db->dirbno) { | |
98627ae8 | 478 | int got; |
463ee0b2 | 479 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
98627ae8 | 480 | || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) |
463ee0b2 | 481 | return 0; |
98627ae8 GS |
482 | if (got==0) |
483 | memset(db->dirbuf,0,DBLKSIZ); | |
463ee0b2 LW |
484 | db->dirbno = dirb; |
485 | ||
486 | debug(("dir read: %d\n", dirb)); | |
487 | } | |
488 | ||
489 | return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); | |
490 | } | |
491 | ||
492 | static int | |
5aaab254 | 493 | setdbit(DBM *db, long int dbit) |
463ee0b2 | 494 | { |
5aaab254 KW |
495 | long c; |
496 | long dirb; | |
463ee0b2 LW |
497 | |
498 | c = dbit / BYTESIZ; | |
499 | dirb = c / DBLKSIZ; | |
500 | ||
501 | if (dirb != db->dirbno) { | |
98627ae8 | 502 | int got; |
463ee0b2 | 503 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 |
98627ae8 | 504 | || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) |
463ee0b2 | 505 | return 0; |
98627ae8 GS |
506 | if (got==0) |
507 | memset(db->dirbuf,0,DBLKSIZ); | |
463ee0b2 LW |
508 | db->dirbno = dirb; |
509 | ||
510 | debug(("dir read: %d\n", dirb)); | |
511 | } | |
512 | ||
513 | db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); | |
514 | ||
98627ae8 | 515 | #if 0 |
463ee0b2 LW |
516 | if (dbit >= db->maxbno) |
517 | db->maxbno += DBLKSIZ * BYTESIZ; | |
98627ae8 GS |
518 | #else |
519 | if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno) | |
520 | db->maxbno=OFF_DIR((dirb+1))*BYTESIZ; | |
521 | #endif | |
463ee0b2 LW |
522 | |
523 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 | |
524 | || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) | |
525 | return 0; | |
526 | ||
527 | return 1; | |
528 | } | |
529 | ||
530 | /* | |
531 | * getnext - get the next key in the page, and if done with | |
532 | * the page, try the next page in sequence | |
533 | */ | |
534 | static datum | |
5aaab254 | 535 | getnext(DBM *db) |
463ee0b2 LW |
536 | { |
537 | datum key; | |
538 | ||
539 | for (;;) { | |
540 | db->keyptr++; | |
541 | key = getnkey(db->pagbuf, db->keyptr); | |
542 | if (key.dptr != NULL) | |
543 | return key; | |
544 | /* | |
545 | * we either run out, or there is nothing on this page.. | |
546 | * try the next one... If we lost our position on the | |
547 | * file, we will have to seek. | |
548 | */ | |
549 | db->keyptr = 0; | |
550 | if (db->pagbno != db->blkptr++) | |
551 | if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) | |
552 | break; | |
553 | db->pagbno = db->blkptr; | |
554 | if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) | |
555 | break; | |
36a4593d TC |
556 | if (!chkpage(db->pagbuf)) { |
557 | errno = EINVAL; | |
558 | db->pagbno = -1; | |
559 | ioerr(db); | |
560 | break; | |
561 | } | |
463ee0b2 LW |
562 | } |
563 | ||
564 | return ioerr(db), nullitem; | |
565 | } | |
85e6fe83 | 566 |