This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Constant data in .c, not in a .h
[perl5.git] / ext / SDBM_File / sdbm.c
CommitLineData
463ee0b2
LW
1/*
2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
6 *
7 * core routines
8 */
9
17f28c40 10#include "INTERN.h"
85e6fe83 11#include "config.h"
4f63d024
GS
12#ifdef WIN32
13#include "io.h"
14#endif
463ee0b2
LW
15#include "sdbm.h"
16#include "tune.h"
17#include "pair.h"
18
85e6fe83
LW
19#ifdef I_FCNTL
20# include <fcntl.h>
463ee0b2 21#endif
85e6fe83
LW
22#ifdef I_SYS_FILE
23# include <sys/file.h>
463ee0b2
LW
24#endif
25
85e6fe83 26#ifdef I_STRING
5518ecd4
JH
27# ifndef __ultrix__
28# include <string.h>
29# endif
85e6fe83
LW
30#else
31# include <strings.h>
463ee0b2
LW
32#endif
33
34/*
35 * externals
36 */
81770b0c
AD
37
38#include <errno.h> /* See notes in perl.h about avoiding
39 extern int errno; */
938f40b7
CB
40#ifdef __cplusplus
41extern "C" {
42#endif
463ee0b2 43
85e6fe83 44extern Malloc_t malloc proto((MEM_SIZE));
851efeba 45extern Free_t free proto((Malloc_t));
bf0c440f 46
938f40b7
CB
47#ifdef __cplusplus
48}
49#endif
50
7b3bf35e
JH
51const datum nullitem = {0, 0};
52
463ee0b2
LW
53/*
54 * forward
55 */
56static int getdbit proto((DBM *, long));
57static int setdbit proto((DBM *, long));
58static int getpage proto((DBM *, long));
59static datum getnext proto((DBM *));
60static int makroom proto((DBM *, long, int));
61
62/*
63 * useful macros
64 */
65#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
66#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
67#define ioerr(db) ((db)->flags |= DBM_IOERR)
68
69#define OFF_PAG(off) (long) (off) * PBLKSIZ
70#define OFF_DIR(off) (long) (off) * DBLKSIZ
71
27da23d5 72static const long masks[] = {
463ee0b2
LW
73 000000000000, 000000000001, 000000000003, 000000000007,
74 000000000017, 000000000037, 000000000077, 000000000177,
75 000000000377, 000000000777, 000000001777, 000000003777,
76 000000007777, 000000017777, 000000037777, 000000077777,
77 000000177777, 000000377777, 000000777777, 000001777777,
78 000003777777, 000007777777, 000017777777, 000037777777,
79 000077777777, 000177777777, 000377777777, 000777777777,
80 001777777777, 003777777777, 007777777777, 017777777777
81};
82
463ee0b2 83DBM *
5aaab254 84sdbm_open(char *file, int flags, int mode)
463ee0b2 85{
5aaab254
KW
86 DBM *db;
87 char *dirname;
88 char *pagname;
081f72ad 89 size_t filelen;
acdbe25b
RU
90 const size_t dirfext_size = sizeof(DIRFEXT "");
91 const size_t pagfext_size = sizeof(PAGFEXT "");
463ee0b2
LW
92
93 if (file == NULL || !*file)
94 return errno = EINVAL, (DBM *) NULL;
95/*
b7b1e41b 96 * need space for two separate filenames
463ee0b2 97 */
081f72ad 98 filelen = strlen(file);
463ee0b2 99
acdbe25b
RU
100 if ((dirname = (char *) malloc(filelen + dirfext_size
101 + filelen + pagfext_size)) == NULL)
463ee0b2
LW
102 return errno = ENOMEM, (DBM *) NULL;
103/*
104 * build the file names
105 */
081f72ad 106 memcpy(dirname, file, filelen);
acdbe25b
RU
107 memcpy(dirname + filelen, DIRFEXT, dirfext_size);
108 pagname = dirname + filelen + dirfext_size;
081f72ad 109 memcpy(pagname, file, filelen);
acdbe25b 110 memcpy(pagname + filelen, PAGFEXT, pagfext_size);
463ee0b2
LW
111
112 db = sdbm_prep(dirname, pagname, flags, mode);
113 free((char *) dirname);
114 return db;
115}
116
117DBM *
f0f333f4 118sdbm_prep(char *dirname, char *pagname, int flags, int mode)
463ee0b2 119{
5aaab254 120 DBM *db;
463ee0b2
LW
121 struct stat dstat;
122
123 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
124 return errno = ENOMEM, (DBM *) NULL;
125
126 db->flags = 0;
127 db->hmask = 0;
128 db->blkptr = 0;
129 db->keyptr = 0;
130/*
131 * adjust user flags so that WRONLY becomes RDWR,
132 * as required by this package. Also set our internal
133 * flag for RDONLY if needed.
134 */
135 if (flags & O_WRONLY)
136 flags = (flags & ~O_WRONLY) | O_RDWR;
137
138 else if ((flags & 03) == O_RDONLY)
139 db->flags = DBM_RDONLY;
140/*
141 * open the files in sequence, and stat the dirfile.
142 * If we fail anywhere, undo everything, return NULL.
143 */
1761cee5 144#if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
4633a7c4
LW
145 flags |= O_BINARY;
146# endif
463ee0b2
LW
147 if ((db->pagf = open(pagname, flags, mode)) > -1) {
148 if ((db->dirf = open(dirname, flags, mode)) > -1) {
149/*
150 * need the dirfile size to establish max bit number.
151 */
152 if (fstat(db->dirf, &dstat) == 0) {
153/*
154 * zero size: either a fresh database, or one with a single,
155 * unsplit data page: dirpage is all zeros.
156 */
157 db->dirbno = (!dstat.st_size) ? 0 : -1;
158 db->pagbno = -1;
159 db->maxbno = dstat.st_size * BYTESIZ;
160
161 (void) memset(db->pagbuf, 0, PBLKSIZ);
162 (void) memset(db->dirbuf, 0, DBLKSIZ);
163 /*
164 * success
165 */
166 return db;
167 }
168 (void) close(db->dirf);
169 }
170 (void) close(db->pagf);
171 }
172 free((char *) db);
173 return (DBM *) NULL;
174}
175
176void
5aaab254 177sdbm_close(DBM *db)
463ee0b2
LW
178{
179 if (db == NULL)
180 errno = EINVAL;
181 else {
182 (void) close(db->dirf);
183 (void) close(db->pagf);
184 free((char *) db);
185 }
186}
187
188datum
5aaab254 189sdbm_fetch(DBM *db, datum key)
463ee0b2
LW
190{
191 if (db == NULL || bad(key))
192 return errno = EINVAL, nullitem;
193
194 if (getpage(db, exhash(key)))
195 return getpair(db->pagbuf, key);
196
197 return ioerr(db), nullitem;
198}
199
200int
5aaab254 201sdbm_exists(DBM *db, datum key)
f4b9d880
RA
202{
203 if (db == NULL || bad(key))
204 return errno = EINVAL, -1;
205
206 if (getpage(db, exhash(key)))
207 return exipair(db->pagbuf, key);
208
209 return ioerr(db), -1;
210}
211
212int
5aaab254 213sdbm_delete(DBM *db, datum key)
463ee0b2
LW
214{
215 if (db == NULL || bad(key))
216 return errno = EINVAL, -1;
217 if (sdbm_rdonly(db))
218 return errno = EPERM, -1;
219
220 if (getpage(db, exhash(key))) {
221 if (!delpair(db->pagbuf, key))
222 return -1;
223/*
224 * update the page file
225 */
226 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
227 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
228 return ioerr(db), -1;
229
230 return 0;
231 }
232
233 return ioerr(db), -1;
234}
235
236int
5aaab254 237sdbm_store(DBM *db, datum key, datum val, int flags)
463ee0b2
LW
238{
239 int need;
5aaab254 240 long hash;
463ee0b2
LW
241
242 if (db == NULL || bad(key))
243 return errno = EINVAL, -1;
244 if (sdbm_rdonly(db))
245 return errno = EPERM, -1;
246
247 need = key.dsize + val.dsize;
248/*
249 * is the pair too big (or too small) for this database ??
250 */
251 if (need < 0 || need > PAIRMAX)
252 return errno = EINVAL, -1;
253
254 if (getpage(db, (hash = exhash(key)))) {
255/*
256 * if we need to replace, delete the key/data pair
257 * first. If it is not there, ignore.
258 */
259 if (flags == DBM_REPLACE)
260 (void) delpair(db->pagbuf, key);
261#ifdef SEEDUPS
262 else if (duppair(db->pagbuf, key))
263 return 1;
264#endif
265/*
266 * if we do not have enough room, we have to split.
267 */
268 if (!fitpair(db->pagbuf, need))
269 if (!makroom(db, hash, need))
270 return ioerr(db), -1;
271/*
272 * we have enough room or split is successful. insert the key,
273 * and update the page file.
274 */
275 (void) putpair(db->pagbuf, key, val);
276
277 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
278 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
279 return ioerr(db), -1;
280 /*
281 * success
282 */
283 return 0;
284 }
285
286 return ioerr(db), -1;
287}
288
289/*
290 * makroom - make room by splitting the overfull page
291 * this routine will attempt to make room for SPLTMAX times before
292 * giving up.
293 */
294static int
5aaab254 295makroom(DBM *db, long int hash, int need)
463ee0b2
LW
296{
297 long newp;
298 char twin[PBLKSIZ];
f6bbbfc7 299#if defined(DOSISH) || defined(WIN32)
300 char zer[PBLKSIZ];
301 long oldtail;
302#endif
463ee0b2 303 char *pag = db->pagbuf;
f0f333f4 304 char *New = twin;
5aaab254 305 int smax = SPLTMAX;
04783dc7
DM
306#ifdef BADMESS
307 int rc;
308#endif
463ee0b2
LW
309
310 do {
311/*
312 * split the current page
313 */
f0f333f4 314 (void) splpage(pag, New, db->hmask + 1);
463ee0b2
LW
315/*
316 * address of the new page
317 */
318 newp = (hash & db->hmask) | (db->hmask + 1);
319
320/*
b7b1e41b 321 * write delay, read avoidance/cache shuffle:
463ee0b2
LW
322 * select the page for incoming pair: if key is to go to the new page,
323 * write out the previous one, and copy the new one over, thus making
324 * it the current page. If not, simply write the new page, and we are
325 * still looking at the page of interest. current page is not updated
326 * here, as sdbm_store will do so, after it inserts the incoming pair.
327 */
f6bbbfc7 328
329#if defined(DOSISH) || defined(WIN32)
330 /*
331 * Fill hole with 0 if made it.
332 * (hole is NOT read as 0)
333 */
334 oldtail = lseek(db->pagf, 0L, SEEK_END);
335 memset(zer, 0, PBLKSIZ);
336 while (OFF_PAG(newp) > oldtail) {
337 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
338 write(db->pagf, zer, PBLKSIZ) < 0) {
339
340 return 0;
341 }
342 oldtail += PBLKSIZ;
343 }
344#endif
463ee0b2
LW
345 if (hash & (db->hmask + 1)) {
346 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
347 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
348 return 0;
349 db->pagbno = newp;
f0f333f4 350 (void) memcpy(pag, New, PBLKSIZ);
463ee0b2
LW
351 }
352 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
f0f333f4 353 || write(db->pagf, New, PBLKSIZ) < 0)
463ee0b2
LW
354 return 0;
355
356 if (!setdbit(db, db->curbit))
357 return 0;
358/*
359 * see if we have enough room now
360 */
361 if (fitpair(pag, need))
362 return 1;
363/*
364 * try again... update curbit and hmask as getpage would have
365 * done. because of our update of the current page, we do not
366 * need to read in anything. BUT we have to write the current
367 * [deferred] page out, as the window of failure is too great.
368 */
369 db->curbit = 2 * db->curbit +
370 ((hash & (db->hmask + 1)) ? 2 : 1);
371 db->hmask |= db->hmask + 1;
372
373 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
374 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
375 return 0;
376
377 } while (--smax);
378/*
379 * if we are here, this is real bad news. After SPLTMAX splits,
380 * we still cannot fit the key. say goodnight.
381 */
382#ifdef BADMESS
04783dc7 383 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
b469f1e0
JH
384 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
385 * useful here but that would mean pulling in perl.h */
386 (void)rc;
463ee0b2
LW
387#endif
388 return 0;
389
390}
391
392/*
393 * the following two routines will break if
394 * deletions aren't taken into account. (ndbm bug)
395 */
396datum
5aaab254 397sdbm_firstkey(DBM *db)
463ee0b2
LW
398{
399 if (db == NULL)
400 return errno = EINVAL, nullitem;
401/*
402 * start at page 0
403 */
404 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
405 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
406 return ioerr(db), nullitem;
407 db->pagbno = 0;
408 db->blkptr = 0;
409 db->keyptr = 0;
410
411 return getnext(db);
412}
413
414datum
5aaab254 415sdbm_nextkey(DBM *db)
463ee0b2
LW
416{
417 if (db == NULL)
418 return errno = EINVAL, nullitem;
419 return getnext(db);
420}
421
422/*
423 * all important binary trie traversal
424 */
425static int
5aaab254 426getpage(DBM *db, long int hash)
463ee0b2 427{
5aaab254
KW
428 int hbit;
429 long dbit;
430 long pagb;
463ee0b2
LW
431
432 dbit = 0;
433 hbit = 0;
434 while (dbit < db->maxbno && getdbit(db, dbit))
435 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
436
437 debug(("dbit: %d...", dbit));
438
439 db->curbit = dbit;
440 db->hmask = masks[hbit];
441
442 pagb = hash & db->hmask;
443/*
444 * see if the block we need is already in memory.
445 * note: this lookaside cache has about 10% hit rate.
446 */
447 if (pagb != db->pagbno) {
448/*
449 * note: here, we assume a "hole" is read as 0s.
450 * if not, must zero pagbuf first.
451 */
452 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
453 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
454 return 0;
455 if (!chkpage(db->pagbuf))
456 return 0;
457 db->pagbno = pagb;
458
459 debug(("pag read: %d\n", pagb));
460 }
461 return 1;
462}
463
464static int
5aaab254 465getdbit(DBM *db, long int dbit)
463ee0b2 466{
5aaab254
KW
467 long c;
468 long dirb;
463ee0b2
LW
469
470 c = dbit / BYTESIZ;
471 dirb = c / DBLKSIZ;
472
473 if (dirb != db->dirbno) {
98627ae8 474 int got;
463ee0b2 475 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 476 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 477 return 0;
98627ae8
GS
478 if (got==0)
479 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2
LW
480 db->dirbno = dirb;
481
482 debug(("dir read: %d\n", dirb));
483 }
484
485 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
486}
487
488static int
5aaab254 489setdbit(DBM *db, long int dbit)
463ee0b2 490{
5aaab254
KW
491 long c;
492 long dirb;
463ee0b2
LW
493
494 c = dbit / BYTESIZ;
495 dirb = c / DBLKSIZ;
496
497 if (dirb != db->dirbno) {
98627ae8 498 int got;
463ee0b2 499 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
98627ae8 500 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
463ee0b2 501 return 0;
98627ae8
GS
502 if (got==0)
503 memset(db->dirbuf,0,DBLKSIZ);
463ee0b2
LW
504 db->dirbno = dirb;
505
506 debug(("dir read: %d\n", dirb));
507 }
508
509 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
510
98627ae8 511#if 0
463ee0b2
LW
512 if (dbit >= db->maxbno)
513 db->maxbno += DBLKSIZ * BYTESIZ;
98627ae8
GS
514#else
515 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
516 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
517#endif
463ee0b2
LW
518
519 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
520 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
521 return 0;
522
523 return 1;
524}
525
526/*
527 * getnext - get the next key in the page, and if done with
528 * the page, try the next page in sequence
529 */
530static datum
5aaab254 531getnext(DBM *db)
463ee0b2
LW
532{
533 datum key;
534
535 for (;;) {
536 db->keyptr++;
537 key = getnkey(db->pagbuf, db->keyptr);
538 if (key.dptr != NULL)
539 return key;
540/*
541 * we either run out, or there is nothing on this page..
542 * try the next one... If we lost our position on the
543 * file, we will have to seek.
544 */
545 db->keyptr = 0;
546 if (db->pagbno != db->blkptr++)
547 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
548 break;
549 db->pagbno = db->blkptr;
550 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
551 break;
552 if (!chkpage(db->pagbuf))
553 break;
554 }
555
556 return ioerr(db), nullitem;
557}
85e6fe83 558