This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
(perl # 132147) improve robustness against corrupt SDBM databases
[perl5.git] / ext / SDBM_File / sdbm.c
CommitLineData
463ee0b2
LW
1/*
2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
6 *
7 * core routines
8 */
9
17f28c40 10#include "INTERN.h"
85e6fe83 11#include "config.h"
4f63d024
GS
12#ifdef WIN32
13#include "io.h"
14#endif
463ee0b2
LW
15#include "sdbm.h"
16#include "tune.h"
17#include "pair.h"
18
85e6fe83
LW
19#ifdef I_FCNTL
20# include <fcntl.h>
463ee0b2 21#endif
85e6fe83
LW
22#ifdef I_SYS_FILE
23# include <sys/file.h>
463ee0b2
LW
24#endif
25
d54fbe84 26#include <string.h>
463ee0b2
LW
27
28/*
29 * externals
30 */
81770b0c
AD
31
32#include <errno.h> /* See notes in perl.h about avoiding
33 extern int errno; */
938f40b7
CB
34#ifdef __cplusplus
35extern "C" {
36#endif
463ee0b2 37
3d97541c
AC
38extern Malloc_t malloc(MEM_SIZE);
39extern Free_t free(Malloc_t);
bf0c440f 40
938f40b7
CB
41#ifdef __cplusplus
42}
43#endif
44
7b3bf35e
JH
45const datum nullitem = {0, 0};
46
463ee0b2
LW
47/*
48 * forward
49 */
3d97541c
AC
50static int getdbit(DBM *, long);
51static int setdbit(DBM *, long);
52static int getpage(DBM *, long);
53static datum getnext(DBM *);
54static int makroom(DBM *, long, int);
463ee0b2
LW
55
56/*
57 * useful macros
58 */
59#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
60#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
61#define ioerr(db) ((db)->flags |= DBM_IOERR)
62
63#define OFF_PAG(off) (long) (off) * PBLKSIZ
64#define OFF_DIR(off) (long) (off) * DBLKSIZ
65
27da23d5 66static const long masks[] = {
463ee0b2
LW
67 000000000000, 000000000001, 000000000003, 000000000007,
68 000000000017, 000000000037, 000000000077, 000000000177,
69 000000000377, 000000000777, 000000001777, 000000003777,
70 000000007777, 000000017777, 000000037777, 000000077777,
71 000000177777, 000000377777, 000000777777, 000001777777,
72 000003777777, 000007777777, 000017777777, 000037777777,
73 000077777777, 000177777777, 000377777777, 000777777777,
74 001777777777, 003777777777, 007777777777, 017777777777
75};
76
463ee0b2 77DBM *
5aaab254 78sdbm_open(char *file, int flags, int mode)
463ee0b2 79{
5aaab254
KW
80 DBM *db;
81 char *dirname;
82 char *pagname;
081f72ad 83 size_t filelen;
acdbe25b
RU
84 const size_t dirfext_size = sizeof(DIRFEXT "");
85 const size_t pagfext_size = sizeof(PAGFEXT "");
463ee0b2
LW
86
87 if (file == NULL || !*file)
88 return errno = EINVAL, (DBM *) NULL;
89/*
b7b1e41b 90 * need space for two separate filenames
463ee0b2 91 */
081f72ad 92 filelen = strlen(file);
463ee0b2 93
acdbe25b
RU
94 if ((dirname = (char *) malloc(filelen + dirfext_size
95 + filelen + pagfext_size)) == NULL)
463ee0b2
LW
96 return errno = ENOMEM, (DBM *) NULL;
97/*
98 * build the file names
99 */
081f72ad 100 memcpy(dirname, file, filelen);
acdbe25b
RU
101 memcpy(dirname + filelen, DIRFEXT, dirfext_size);
102 pagname = dirname + filelen + dirfext_size;
081f72ad 103 memcpy(pagname, file, filelen);
acdbe25b 104 memcpy(pagname + filelen, PAGFEXT, pagfext_size);
463ee0b2
LW
105
106 db = sdbm_prep(dirname, pagname, flags, mode);
107 free((char *) dirname);
108 return db;
109}
110
111DBM *
f0f333f4 112sdbm_prep(char *dirname, char *pagname, int flags, int mode)
463ee0b2 113{
5aaab254 114 DBM *db;
463ee0b2
LW
115 struct stat dstat;
116
117 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
118 return errno = ENOMEM, (DBM *) NULL;
119
120 db->flags = 0;
121 db->hmask = 0;
122 db->blkptr = 0;
123 db->keyptr = 0;
124/*
125 * adjust user flags so that WRONLY becomes RDWR,
126 * as required by this package. Also set our internal
127 * flag for RDONLY if needed.
128 */
129 if (flags & O_WRONLY)
130 flags = (flags & ~O_WRONLY) | O_RDWR;
131
132 else if ((flags & 03) == O_RDONLY)
133 db->flags = DBM_RDONLY;
134/*
135 * open the files in sequence, and stat the dirfile.
136 * If we fail anywhere, undo everything, return NULL.
137 */
1761cee5 138#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
4633a7c4
LW
139 flags |= O_BINARY;
140# endif
463ee0b2
LW
141 if ((db->pagf = open(pagname, flags, mode)) > -1) {
142 if ((db->dirf = open(dirname, flags, mode)) > -1) {
143/*
144 * need the dirfile size to establish max bit number.
145 */
146 if (fstat(db->dirf, &dstat) == 0) {
147/*
148 * zero size: either a fresh database, or one with a single,
149 * unsplit data page: dirpage is all zeros.
150 */
151 db->dirbno = (!dstat.st_size) ? 0 : -1;
152 db->pagbno = -1;
153 db->maxbno = dstat.st_size * BYTESIZ;
154
155 (void) memset(db->pagbuf, 0, PBLKSIZ);
156 (void) memset(db->dirbuf, 0, DBLKSIZ);
157 /*
158 * success
159 */
160 return db;
161 }
162 (void) close(db->dirf);
163 }
164 (void) close(db->pagf);
165 }
166 free((char *) db);
167 return (DBM *) NULL;
168}
169
170void
5aaab254 171sdbm_close(DBM *db)
463ee0b2
LW
172{
173 if (db == NULL)
174 errno = EINVAL;
175 else {
176 (void) close(db->dirf);
177 (void) close(db->pagf);
178 free((char *) db);
179 }
180}
181
182datum
5aaab254 183sdbm_fetch(DBM *db, datum key)
463ee0b2
LW
184{
185 if (db == NULL || bad(key))
186 return errno = EINVAL, nullitem;
187
188 if (getpage(db, exhash(key)))
189 return getpair(db->pagbuf, key);
190
191 return ioerr(db), nullitem;
192}
193
194int
5aaab254 195sdbm_exists(DBM *db, datum key)
f4b9d880
RA
196{
197 if (db == NULL || bad(key))
198 return errno = EINVAL, -1;
199
200 if (getpage(db, exhash(key)))
201 return exipair(db->pagbuf, key);
202
203 return ioerr(db), -1;
204}
205
206int
5aaab254 207sdbm_delete(DBM *db, datum key)
463ee0b2
LW
208{
209 if (db == NULL || bad(key))
210 return errno = EINVAL, -1;
211 if (sdbm_rdonly(db))
212 return errno = EPERM, -1;
213
214 if (getpage(db, exhash(key))) {
215 if (!delpair(db->pagbuf, key))
216 return -1;
217/*
218 * update the page file
219 */
220 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
221 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
222 return ioerr(db), -1;
223
224 return 0;
225 }
226
227 return ioerr(db), -1;
228}
229
230int
5aaab254 231sdbm_store(DBM *db, datum key, datum val, int flags)
463ee0b2
LW
232{
233 int need;
5aaab254 234 long hash;
463ee0b2
LW
235
236 if (db == NULL || bad(key))
237 return errno = EINVAL, -1;
238 if (sdbm_rdonly(db))
239 return errno = EPERM, -1;
240
241 need = key.dsize + val.dsize;
242/*
243 * is the pair too big (or too small) for this database ??
244 */
245 if (need < 0 || need > PAIRMAX)
246 return errno = EINVAL, -1;
247
248 if (getpage(db, (hash = exhash(key)))) {
249/*
250 * if we need to replace, delete the key/data pair
251 * first. If it is not there, ignore.
252 */
253 if (flags == DBM_REPLACE)
254 (void) delpair(db->pagbuf, key);
255#ifdef SEEDUPS
256 else if (duppair(db->pagbuf, key))
257 return 1;
258#endif
259/*
260 * if we do not have enough room, we have to split.
261 */
262 if (!fitpair(db->pagbuf, need))
263 if (!makroom(db, hash, need))
264 return ioerr(db), -1;
265/*
266 * we have enough room or split is successful. insert the key,
267 * and update the page file.
268 */
269 (void) putpair(db->pagbuf, key, val);
270
271 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
272 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
273 return ioerr(db), -1;
274 /*
275 * success
276 */
277 return 0;
278 }
279
280 return ioerr(db), -1;
281}
282
283/*
284 * makroom - make room by splitting the overfull page
285 * this routine will attempt to make room for SPLTMAX times before
286 * giving up.
287 */
288static int
5aaab254 289makroom(DBM *db, long int hash, int need)
463ee0b2
LW
290{
291 long newp;
292 char twin[PBLKSIZ];
f6bbbfc7
NS
293#if defined(DOSISH) || defined(WIN32)
294 char zer[PBLKSIZ];
295 long oldtail;
296#endif
463ee0b2 297 char *pag = db->pagbuf;
f0f333f4 298 char *New = twin;
5aaab254 299 int smax = SPLTMAX;
04783dc7
DM
300#ifdef BADMESS
301 int rc;
302#endif
463ee0b2
LW
303
304 do {
305/*
306 * split the current page
307 */
f0f333f4 308 (void) splpage(pag, New, db->hmask + 1);
463ee0b2
LW
309/*
310 * address of the new page
311 */
312 newp = (hash & db->hmask) | (db->hmask + 1);
313
314/*
b7b1e41b 315 * write delay, read avoidance/cache shuffle:
463ee0b2
LW
316 * select the page for incoming pair: if key is to go to the new page,
317 * write out the previous one, and copy the new one over, thus making
318 * it the current page. If not, simply write the new page, and we are
319 * still looking at the page of interest. current page is not updated
320 * here, as sdbm_store will do so, after it inserts the incoming pair.
321 */
f6bbbfc7
NS
322
323#if defined(DOSISH) || defined(WIN32)
324 /*
325 * Fill hole with 0 if made it.
326 * (hole is NOT read as 0)
327 */
328 oldtail = lseek(db->pagf, 0L, SEEK_END);
329 memset(zer, 0, PBLKSIZ);
330 while (OFF_PAG(newp) > oldtail) {
331 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
332 write(db->pagf, zer, PBLKSIZ) < 0) {
333
334 return 0;
335 }
336 oldtail += PBLKSIZ;
337 }
338#endif
463ee0b2
LW
339 if (hash & (db->hmask + 1)) {
340 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
341 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
342 return 0;
343 db->pagbno = newp;
f0f333f4 344 (void) memcpy(pag, New, PBLKSIZ);
463ee0b2
LW
345 }
346 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
f0f333f4 347 || write(db->pagf, New, PBLKSIZ) < 0)
463ee0b2
LW
348 return 0;
349
350 if (!setdbit(db, db->curbit))
351 return 0;
352/*
353 * see if we have enough room now
354 */
355 if (fitpair(pag, need))
356 return 1;
357/*
358 * try again... update curbit and hmask as getpage would have
359 * done. because of our update of the current page, we do not
360 * need to read in anything. BUT we have to write the current
361 * [deferred] page out, as the window of failure is too great.
362 */
363 db->curbit = 2 * db->curbit +
364 ((hash & (db->hmask + 1)) ? 2 : 1);
365 db->hmask |= db->hmask + 1;
366
367 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
368 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
369 return 0;
370
371 } while (--smax);
372/*
373 * if we are here, this is real bad news. After SPLTMAX splits,
374 * we still cannot fit the key. say goodnight.
375 */
376#ifdef BADMESS
04783dc7 377 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
b469f1e0
JH
378 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
379 * useful here but that would mean pulling in perl.h */
380 (void)rc;
463ee0b2
LW
381#endif
382 return 0;
383
384}
385
386/*
387 * the following two routines will break if
388 * deletions aren't taken into account. (ndbm bug)
389 */
390datum
5aaab254 391sdbm_firstkey(DBM *db)
463ee0b2
LW
392{
393 if (db == NULL)
394 return errno = EINVAL, nullitem;
395/*
396 * start at page 0
397 */
398 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
399 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
400 return ioerr(db), nullitem;
36a4593d
TC
401 if (!chkpage(db->pagbuf)) {
402 errno = EINVAL;
403 ioerr(db);
404 db->pagbno = -1;
405 return nullitem;
406 }
463ee0b2
LW
407 db->pagbno = 0;
408 db->blkptr = 0;
409 db->keyptr = 0;
410
411 return getnext(db);
412}
413
414datum
5aaab254 415sdbm_nextkey(DBM *db)
463ee0b2
LW
416{
417 if (db == NULL)
418 return errno = EINVAL, nullitem;
419 return getnext(db);
420}
421
422/*
423 * all important binary trie traversal
424 */
425static int
5aaab254 426getpage(DBM *db, long int hash)
463ee0b2 427{
5aaab254
KW
428 int hbit;
429 long dbit;
430 long pagb;
463ee0b2
LW
431
432 dbit = 0;
433 hbit = 0;
434 while (dbit < db->maxbno && getdbit(db, dbit))
435 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
436
437 debug(("dbit: %d...", dbit));
438
439 db->curbit = dbit;
440 db->hmask = masks[hbit];
441
442 pagb = hash & db->hmask;
443/*
444 * see if the block we need is already in memory.
445 * note: this lookaside cache has about 10% hit rate.
446 */
447 if (pagb != db->pagbno) {
448/*
449 * note: here, we assume a "hole" is read as 0s.
450 * if not, must zero pagbuf first.
451 */
452 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
453 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
454 return 0;
36a4593d
TC
455 if (!chkpage(db->pagbuf)) {
456 errno = EINVAL;
457 db->pagbno = -1;
458 ioerr(db);
459 return 0;
460 }
463ee0b2
LW
461 db->pagbno = pagb;
462
463 debug(("pag read: %d\n", pagb));
464 }
465 return 1;
466}
467
468static int
5aaab254 469getdbit(DBM *db, long int dbit)
463ee0b2 470{
5aaab254
KW
471 long c;
472 long dirb;
463ee0b2
LW
473
474 c = dbit / BYTESIZ;
475 dirb = c / DBLKSIZ;
476
477 if (dirb != db->dirbno) {
98627ae8 478 int got;
463ee0b2 479 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 480 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 481 return 0;
98627ae8
GS
482 if (got==0)
483 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2
LW
484 db->dirbno = dirb;
485
486 debug(("dir read: %d\n", dirb));
487 }
488
489 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
490}
491
492static int
5aaab254 493setdbit(DBM *db, long int dbit)
463ee0b2 494{
5aaab254
KW
495 long c;
496 long dirb;
463ee0b2
LW
497
498 c = dbit / BYTESIZ;
499 dirb = c / DBLKSIZ;
500
501 if (dirb != db->dirbno) {
98627ae8 502 int got;
463ee0b2 503 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 504 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 505 return 0;
98627ae8
GS
506 if (got==0)
507 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2
LW
508 db->dirbno = dirb;
509
510 debug(("dir read: %d\n", dirb));
511 }
512
513 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
514
98627ae8 515#if 0
463ee0b2
LW
516 if (dbit >= db->maxbno)
517 db->maxbno += DBLKSIZ * BYTESIZ;
98627ae8
GS
518#else
519 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
520 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
521#endif
463ee0b2
LW
522
523 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
524 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
525 return 0;
526
527 return 1;
528}
529
530/*
531 * getnext - get the next key in the page, and if done with
532 * the page, try the next page in sequence
533 */
534static datum
5aaab254 535getnext(DBM *db)
463ee0b2
LW
536{
537 datum key;
538
539 for (;;) {
540 db->keyptr++;
541 key = getnkey(db->pagbuf, db->keyptr);
542 if (key.dptr != NULL)
543 return key;
544/*
545 * we either run out, or there is nothing on this page..
546 * try the next one... If we lost our position on the
547 * file, we will have to seek.
548 */
549 db->keyptr = 0;
550 if (db->pagbno != db->blkptr++)
551 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
552 break;
553 db->pagbno = db->blkptr;
554 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
555 break;
36a4593d
TC
556 if (!chkpage(db->pagbuf)) {
557 errno = EINVAL;
558 db->pagbno = -1;
559 ioerr(db);
560 break;
561 }
463ee0b2
LW
562 }
563
564 return ioerr(db), nullitem;
565}
85e6fe83 566