This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Move FileCache from lib to ext
[perl5.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e
LW
1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6d02d21f 3# written by Paul Marquess (pmqs@cpan.org)
10261a36 4# last modified 28th October 2007
d8bf0b8c 5# version 1.818
36477c24 6#
d8bf0b8c 7# Copyright (c) 1995-2009 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e
AD
11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e
AD
28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
efc79c7d
PM
35 bless { VALID => {
36 bsize => 1,
37 ffactor => 1,
38 nelem => 1,
39 cachesize => 1,
40 hash => 2,
41 lorder => 1,
36477c24 42 },
43 GOT => {}
44 }, $pkg ;
88108326 45}
8e07c86e 46
610ab055 47
8e07c86e
AD
48sub FETCH
49{
88108326 50 my $self = shift ;
51 my $key = shift ;
8e07c86e 52
36477c24 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 54
55 my $pkg = ref $self ;
56 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
57}
58
59
60sub STORE
61{
88108326 62 my $self = shift ;
63 my $key = shift ;
64 my $value = shift ;
65
efc79c7d
PM
66 my $type = $self->{VALID}{$key};
67
68 if ( $type )
8e07c86e 69 {
efc79c7d
PM
70 croak "Key '$key' not associated with a code reference"
71 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 72 $self->{GOT}{$key} = $value ;
8e07c86e
AD
73 return ;
74 }
75
88108326 76 my $pkg = ref $self ;
77 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
78}
79
80sub DELETE
81{
88108326 82 my $self = shift ;
83 my $key = shift ;
84
36477c24 85 if ( exists $self->{VALID}{$key} )
8e07c86e 86 {
36477c24 87 delete $self->{GOT}{$key} ;
8e07c86e
AD
88 return ;
89 }
90
88108326 91 my $pkg = ref $self ;
92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
93}
94
88108326 95sub EXISTS
8e07c86e 96{
88108326 97 my $self = shift ;
98 my $key = shift ;
8e07c86e 99
36477c24 100 exists $self->{VALID}{$key} ;
8e07c86e
AD
101}
102
88108326 103sub NotHere
8e07c86e 104{
18d2dc8c 105 my $self = shift ;
88108326 106 my $method = shift ;
8e07c86e 107
18d2dc8c 108 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
109}
110
18d2dc8c
PM
111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
114
115package DB_File::RECNOINFO ;
785da04d 116
3245f058 117use warnings;
88108326 118use strict ;
119
045291aa 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
121
122sub TIEHASH
123{
88108326 124 my $pkg = shift ;
125
36477c24 126 bless { VALID => { map {$_, 1}
127 qw( bval cachesize psize flags lorder reclen bfname )
128 },
129 GOT => {},
130 }, $pkg ;
8e07c86e
AD
131}
132
88108326 133package DB_File::BTREEINFO ;
8e07c86e 134
3245f058 135use warnings;
88108326 136use strict ;
8e07c86e 137
88108326 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 139
88108326 140sub TIEHASH
8e07c86e 141{
88108326 142 my $pkg = shift ;
143
efc79c7d
PM
144 bless { VALID => {
145 flags => 1,
146 cachesize => 1,
147 maxkeypage => 1,
148 minkeypage => 1,
149 psize => 1,
150 compare => 2,
151 prefix => 2,
152 lorder => 1,
36477c24 153 },
154 GOT => {},
155 }, $pkg ;
8e07c86e
AD
156}
157
158
8e07c86e 159package DB_File ;
785da04d 160
3245f058 161use warnings;
785da04d 162use strict;
07200f1b 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
c36891e2 164our ($db_version, $use_XSLoader, $splice_end_array, $Error);
8e07c86e
AD
165use Carp;
166
785da04d 167
a773d2c7 168$VERSION = "1.820" ;
083e9212 169$VERSION = eval $VERSION; # needed for dev releases
d85a743d
PM
170
171{
172 local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
173 my @a =(1); splice(@a, 3);
174 $splice_end_array =
175 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
176}
8e07c86e
AD
177
178#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 179$DB_BTREE = new DB_File::BTREEINFO ;
180$DB_HASH = new DB_File::HASHINFO ;
181$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 182
785da04d 183require Tie::Hash;
8e07c86e
AD
184require Exporter;
185use AutoLoader;
b90e71be
GS
186BEGIN {
187 $use_XSLoader = 1 ;
e5021521 188 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be
GS
189
190 if ($@) {
191 $use_XSLoader = 0 ;
192 require DynaLoader;
193 @ISA = qw(DynaLoader);
194 }
195}
196
197push @ISA, qw(Tie::Hash Exporter);
8e07c86e
AD
198@EXPORT = qw(
199 $DB_BTREE $DB_HASH $DB_RECNO
88108326 200
8e07c86e
AD
201 BTREEMAGIC
202 BTREEVERSION
203 DB_LOCK
204 DB_SHMEM
205 DB_TXN
206 HASHMAGIC
207 HASHVERSION
208 MAX_PAGE_NUMBER
209 MAX_PAGE_OFFSET
210 MAX_REC_NUMBER
211 RET_ERROR
212 RET_SPECIAL
213 RET_SUCCESS
214 R_CURSOR
215 R_DUP
216 R_FIRST
217 R_FIXEDLEN
218 R_IAFTER
219 R_IBEFORE
220 R_LAST
221 R_NEXT
222 R_NOKEY
223 R_NOOVERWRITE
224 R_PREV
225 R_RECNOSYNC
226 R_SETCURSOR
227 R_SNAPSHOT
228 __R_UNUSED
88108326 229
045291aa 230);
8e07c86e
AD
231
232sub AUTOLOAD {
785da04d 233 my($constname);
8e07c86e 234 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b
PM
235 my ($error, $val) = constant($constname);
236 Carp::croak $error if $error;
57c77851
JS
237 no strict 'refs';
238 *{$AUTOLOAD} = sub { $val };
239 goto &{$AUTOLOAD};
07200f1b 240}
8e07c86e 241
f6b705ef 242
a6ed719b 243eval {
1f70e1ea
PM
244 # Make all Fcntl O_XXX constants available for importing
245 require Fcntl;
246 my @O = grep /^O_/, @Fcntl::EXPORT;
247 Fcntl->import(@O); # first we import what we want to export
248 push(@EXPORT, @O);
a6ed719b 249};
f6b705ef 250
b90e71be
GS
251if ($use_XSLoader)
252 { XSLoader::load("DB_File", $VERSION)}
253else
254 { bootstrap DB_File $VERSION }
8e07c86e
AD
255
256# Preloaded methods go here. Autoload methods go after __END__, and are
257# processed by the autosplit program.
258
05475680 259sub tie_hash_or_array
610ab055
PM
260{
261 my (@arg) = @_ ;
05475680 262 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 263
f1aa208b
RGS
264 use File::Spec;
265 $arg[1] = File::Spec->rel2abs($arg[1])
266 if defined $arg[1] ;
267
610ab055
PM
268 $arg[4] = tied %{ $arg[4] }
269 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
270
efc79c7d
PM
271 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
272 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
273
9c095db2
PM
274 # make recno in Berkeley DB version 2 (or better) work like
275 # recno in version 1.
d6067fe3
SP
276 if ($db_version >= 4 and ! $tieHASH) {
277 $arg[2] |= O_CREAT();
278 }
279
1f70e1ea
PM
280 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
281 $arg[1] and ! -e $arg[1]) {
282 open(FH, ">$arg[1]") or return undef ;
283 close FH ;
284 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
285 }
286
05475680 287 DoTie_($tieHASH, @arg) ;
610ab055
PM
288}
289
05475680
PM
290sub TIEHASH
291{
292 tie_hash_or_array(@_) ;
293}
294
295sub TIEARRAY
296{
297 tie_hash_or_array(@_) ;
298}
88108326 299
045291aa
PM
300sub CLEAR
301{
1f70e1ea 302 my $self = shift;
3245f058 303 my $key = 0 ;
1f70e1ea
PM
304 my $value = "" ;
305 my $status = $self->seq($key, $value, R_FIRST());
306 my @keys;
307
308 while ($status == 0) {
309 push @keys, $key;
310 $status = $self->seq($key, $value, R_NEXT());
311 }
312 foreach $key (reverse @keys) {
313 my $s = $self->del($key);
314 }
315}
316
045291aa
PM
317sub EXTEND { }
318
319sub STORESIZE
320{
321 my $self = shift;
322 my $length = shift ;
323 my $current_length = $self->length() ;
324
325 if ($length < $current_length) {
326 my $key ;
327 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
328 { $self->del($key) }
329 }
a9fd575d
PM
330 elsif ($length > $current_length) {
331 $self->put($length-1, "") ;
332 }
045291aa
PM
333}
334
c5da4faf
PM
335
336sub SPLICE
337{
338 my $self = shift;
339 my $offset = shift;
340 if (not defined $offset) {
d85a743d 341 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
342 $offset = 0;
343 }
344
345 my $length = @_ ? shift : 0;
346 # Carping about definedness comes _after_ the OFFSET sanity check.
347 # This is so we get the same error messages as Perl's splice().
348 #
349
350 my @list = @_;
351
352 my $size = $self->FETCHSIZE();
353
354 # 'If OFFSET is negative then it start that far from the end of
355 # the array.'
356 #
357 if ($offset < 0) {
358 my $new_offset = $size + $offset;
359 if ($new_offset < 0) {
360 die "Modification of non-creatable array value attempted, "
361 . "subscript $offset";
362 }
363 $offset = $new_offset;
364 }
365
c5da4faf 366 if (not defined $length) {
d85a743d 367 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
368 $length = 0;
369 }
370
d85a743d
PM
371 if ($offset > $size) {
372 $offset = $size;
373 warnings::warnif('misc', 'splice() offset past end of array')
374 if $splice_end_array;
375 }
376
c5da4faf
PM
377 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
378 if (not defined $length) {
379 $length = $size - $offset;
380 }
381
382 # 'If LENGTH is negative, leave that many elements off the end of
383 # the array.'
384 #
385 if ($length < 0) {
386 $length = $size - $offset + $length;
387
388 if ($length < 0) {
389 # The user must have specified a length bigger than the
390 # length of the array passed in. But perl's splice()
391 # doesn't catch this, it just behaves as for length=0.
392 #
393 $length = 0;
394 }
395 }
396
397 if ($length > $size - $offset) {
398 $length = $size - $offset;
399 }
400
401 # $num_elems holds the current number of elements in the database.
402 my $num_elems = $size;
403
404 # 'Removes the elements designated by OFFSET and LENGTH from an
405 # array,'...
406 #
407 my @removed = ();
408 foreach (0 .. $length - 1) {
409 my $old;
410 my $status = $self->get($offset, $old);
411 if ($status != 0) {
412 my $msg = "error from Berkeley DB on get($offset, \$old)";
413 if ($status == 1) {
414 $msg .= ' (no such element?)';
415 }
416 else {
417 $msg .= ": error status $status";
418 if (defined $! and $! ne '') {
419 $msg .= ", message $!";
420 }
421 }
422 die $msg;
423 }
424 push @removed, $old;
425
426 $status = $self->del($offset);
427 if ($status != 0) {
428 my $msg = "error from Berkeley DB on del($offset)";
429 if ($status == 1) {
430 $msg .= ' (no such element?)';
431 }
432 else {
433 $msg .= ": error status $status";
434 if (defined $! and $! ne '') {
435 $msg .= ", message $!";
436 }
437 }
438 die $msg;
439 }
440
441 -- $num_elems;
442 }
443
444 # ...'and replaces them with the elements of LIST, if any.'
445 my $pos = $offset;
446 while (defined (my $elem = shift @list)) {
447 my $old_pos = $pos;
448 my $status;
449 if ($pos >= $num_elems) {
450 $status = $self->put($pos, $elem);
451 }
452 else {
453 $status = $self->put($pos, $elem, $self->R_IBEFORE);
454 }
455
456 if ($status != 0) {
457 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
458 if ($status == 1) {
459 $msg .= ' (no such element?)';
460 }
461 else {
462 $msg .= ", error status $status";
463 if (defined $! and $! ne '') {
464 $msg .= ", message $!";
465 }
466 }
467 die $msg;
468 }
469
470 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
471 if $old_pos != $pos;
472
473 ++ $pos;
474 ++ $num_elems;
475 }
476
477 if (wantarray) {
478 # 'In list context, returns the elements removed from the
479 # array.'
480 #
481 return @removed;
482 }
483 elsif (defined wantarray and not wantarray) {
484 # 'In scalar context, returns the last element removed, or
485 # undef if no elements are removed.'
486 #
487 if (@removed) {
488 my $last = pop @removed;
489 return "$last";
490 }
491 else {
492 return undef;
493 }
494 }
495 elsif (not defined wantarray) {
496 # Void context
497 }
498 else { die }
499}
500sub ::DB_File::splice { &SPLICE }
501
6ca2e664
PM
502sub find_dup
503{
504 croak "Usage: \$db->find_dup(key,value)\n"
505 unless @_ == 3 ;
506
507 my $db = shift ;
508 my ($origkey, $value_wanted) = @_ ;
509 my ($key, $value) = ($origkey, 0);
510 my ($status) = 0 ;
511
512 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
513 $status == 0 ;
514 $status = $db->seq($key, $value, R_NEXT() ) ) {
515
516 return 0 if $key eq $origkey and $value eq $value_wanted ;
517 }
518
519 return $status ;
520}
521
522sub del_dup
523{
524 croak "Usage: \$db->del_dup(key,value)\n"
525 unless @_ == 3 ;
526
527 my $db = shift ;
528 my ($key, $value) = @_ ;
529 my ($status) = $db->find_dup($key, $value) ;
530 return $status if $status != 0 ;
531
532 $status = $db->del($key, R_CURSOR() ) ;
533 return $status ;
534}
535
88108326 536sub get_dup
537{
538 croak "Usage: \$db->get_dup(key [,flag])\n"
539 unless @_ == 2 or @_ == 3 ;
540
541 my $db = shift ;
542 my $key = shift ;
543 my $flag = shift ;
f6b705ef 544 my $value = 0 ;
88108326 545 my $origkey = $key ;
546 my $wantarray = wantarray ;
f6b705ef 547 my %values = () ;
88108326 548 my @values = () ;
549 my $counter = 0 ;
f6b705ef 550 my $status = 0 ;
88108326 551
f6b705ef 552 # iterate through the database until either EOF ($status == 0)
553 # or a different key is encountered ($key ne $origkey).
554 for ($status = $db->seq($key, $value, R_CURSOR()) ;
555 $status == 0 and $key eq $origkey ;
556 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 557
f6b705ef 558 # save the value or count number of matches
559 if ($wantarray) {
560 if ($flag)
561 { ++ $values{$value} }
562 else
563 { push (@values, $value) }
564 }
565 else
566 { ++ $counter }
88108326 567
88108326 568 }
569
f6b705ef 570 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 571}
572
573
8e07c86e
AD
5741;
575__END__
576
3b35bae3
AD
577=head1 NAME
578
1f70e1ea 579DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
580
581=head1 SYNOPSIS
582
bbc7dcd2
MS
583 use DB_File;
584
88108326 585 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
586 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
587 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 588
3b35bae3
AD
589 $status = $X->del($key [, $flags]) ;
590 $status = $X->put($key, $value [, $flags]) ;
591 $status = $X->get($key, $value [, $flags]) ;
760ac839 592 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
593 $status = $X->sync([$flags]) ;
594 $status = $X->fd ;
760ac839 595
f6b705ef 596 # BTREE only
88108326 597 $count = $X->get_dup($key) ;
598 @list = $X->get_dup($key) ;
599 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
600 $status = $X->find_dup($key, $value) ;
601 $status = $X->del_dup($key, $value) ;
88108326 602
f6b705ef 603 # RECNO only
604 $a = $X->length;
605 $a = $X->pop ;
606 $X->push(list);
607 $a = $X->shift;
608 $X->unshift(list);
c5da4faf 609 @r = $X->splice(offset, length, elements);
f6b705ef 610
cad2e5aa
JH
611 # DBM Filters
612 $old_filter = $db->filter_store_key ( sub { ... } ) ;
613 $old_filter = $db->filter_store_value( sub { ... } ) ;
614 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
615 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
616
3b35bae3
AD
617 untie %hash ;
618 untie @array ;
619
620=head1 DESCRIPTION
621
8e07c86e 622B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 623facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 624version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f
PM
625It is assumed that you have a copy of the Berkeley DB manual pages at
626hand when reading this documentation. The interface defined here
627mirrors the Berkeley DB interface closely.
68dc0745 628
8e07c86e
AD
629Berkeley DB is a C library which provides a consistent interface to a
630number of database formats. B<DB_File> provides an interface to all
631three of the database types currently supported by Berkeley DB.
3b35bae3
AD
632
633The file types are:
634
635=over 5
636
88108326 637=item B<DB_HASH>
3b35bae3 638
88108326 639This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
640files. This is equivalent to the functionality provided by other
641hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
642the files created using DB_HASH are not compatible with any of the
643other packages mentioned.
3b35bae3 644
8e07c86e
AD
645A default hashing algorithm, which will be adequate for most
646applications, is built into Berkeley DB. If you do need to use your own
647hashing algorithm it is possible to write your own in Perl and have
648B<DB_File> use it instead.
3b35bae3 649
88108326 650=item B<DB_BTREE>
651
652The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 653sorted, balanced binary tree.
3b35bae3 654
8e07c86e
AD
655As with the DB_HASH format, it is possible to provide a user defined
656Perl routine to perform the comparison of keys. By default, though, the
657keys are stored in lexical order.
3b35bae3 658
88108326 659=item B<DB_RECNO>
3b35bae3 660
8e07c86e
AD
661DB_RECNO allows both fixed-length and variable-length flat text files
662to be manipulated using the same key/value pair interface as in DB_HASH
663and DB_BTREE. In this case the key will consist of a record (line)
664number.
3b35bae3
AD
665
666=back
667
e5021521 668=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea
PM
669
670Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 671it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 672limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 673version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 674like version 1. This feature allows B<DB_File> scripts that were built
e5021521 675with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea
PM
676
677If you want to make use of the new features available in Berkeley DB
b90e71be 6782.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 679
e5021521
JH
680B<Note:> The database file format has changed multiple times in Berkeley
681DB version 2, 3 and 4. If you cannot recreate your databases, you
682must dump any existing databases with either the C<db_dump> or the
683C<db_dump185> utility that comes with Berkeley DB.
684Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
685your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea
PM
686documentation for further details.
687
e5021521 688Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 689DB with DB_File.
1f70e1ea 690
68dc0745 691=head2 Interface to Berkeley DB
3b35bae3
AD
692
693B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
694in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
695allows B<DB_File> to access Berkeley DB files using either an
696associative array (for DB_HASH & DB_BTREE file types) or an ordinary
697array (for the DB_RECNO file type).
3b35bae3 698
88108326 699In addition to the tie() interface, it is also possible to access most
700of the functions provided in the Berkeley DB API directly.
f6b705ef 701See L<THE API INTERFACE>.
3b35bae3 702
88108326 703=head2 Opening a Berkeley DB Database File
3b35bae3 704
8e07c86e 705Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 706Here is the C prototype for dbopen():
3b35bae3
AD
707
708 DB*
709 dbopen (const char * file, int flags, int mode,
710 DBTYPE type, const void * openinfo)
711
712The parameter C<type> is an enumeration which specifies which of the 3
713interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
714Depending on which of these is actually chosen, the final parameter,
715I<openinfo> points to a data structure which allows tailoring of the
716specific interface method.
717
8e07c86e 718This interface is handled slightly differently in B<DB_File>. Here is
88108326 719an equivalent call using B<DB_File>:
3b35bae3 720
88108326 721 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 722
8e07c86e
AD
723The C<filename>, C<flags> and C<mode> parameters are the direct
724equivalent of their dbopen() counterparts. The final parameter $DB_HASH
725performs the function of both the C<type> and C<openinfo> parameters in
726dbopen().
3b35bae3 727
88108326 728In the example above $DB_HASH is actually a pre-defined reference to a
729hash object. B<DB_File> has three of these pre-defined references.
730Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 731
8e07c86e
AD
732The keys allowed in each of these pre-defined references is limited to
733the names used in the equivalent C structure. So, for example, the
734$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 735C<ffactor>, C<hash>, C<lorder> and C<nelem>.
736
737To change one of these elements, just assign to it like this:
738
739 $DB_HASH->{'cachesize'} = 10000 ;
740
741The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
742usually adequate for most applications. If you do need to create extra
743instances of these objects, constructors are available for each file
744type.
745
746Here are examples of the constructors and the valid options available
747for DB_HASH, DB_BTREE and DB_RECNO respectively.
748
749 $a = new DB_File::HASHINFO ;
750 $a->{'bsize'} ;
751 $a->{'cachesize'} ;
752 $a->{'ffactor'};
753 $a->{'hash'} ;
754 $a->{'lorder'} ;
755 $a->{'nelem'} ;
756
757 $b = new DB_File::BTREEINFO ;
758 $b->{'flags'} ;
759 $b->{'cachesize'} ;
760 $b->{'maxkeypage'} ;
761 $b->{'minkeypage'} ;
762 $b->{'psize'} ;
763 $b->{'compare'} ;
764 $b->{'prefix'} ;
765 $b->{'lorder'} ;
766
767 $c = new DB_File::RECNOINFO ;
768 $c->{'bval'} ;
769 $c->{'cachesize'} ;
770 $c->{'psize'} ;
771 $c->{'flags'} ;
772 $c->{'lorder'} ;
773 $c->{'reclen'} ;
774 $c->{'bfname'} ;
775
776The values stored in the hashes above are mostly the direct equivalent
777of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 778default values - that means you don't have to set I<all> of the
88108326 779values when you only want to change one. Here is an example:
780
781 $a = new DB_File::HASHINFO ;
782 $a->{'cachesize'} = 12345 ;
783 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
784
36477c24 785A few of the options need extra discussion here. When used, the C
88108326 786equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
787to C functions. In B<DB_File> these keys are used to store references
788to Perl subs. Below are templates for each of the subs:
789
790 sub hash
791 {
792 my ($data) = @_ ;
793 ...
794 # return the hash value for $data
795 return $hash ;
796 }
3b35bae3 797
88108326 798 sub compare
799 {
800 my ($key, $key2) = @_ ;
801 ...
802 # return 0 if $key1 eq $key2
803 # -1 if $key1 lt $key2
804 # 1 if $key1 gt $key2
805 return (-1 , 0 or 1) ;
806 }
3b35bae3 807
88108326 808 sub prefix
809 {
810 my ($key, $key2) = @_ ;
811 ...
812 # return number of bytes of $key2 which are
813 # necessary to determine that it is greater than $key1
814 return $bytes ;
815 }
3b35bae3 816
f6b705ef 817See L<Changing the BTREE sort order> for an example of using the
818C<compare> template.
88108326 819
36477c24 820If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 821C<bval>, you should check out L<The 'bval' Option>.
36477c24 822
88108326 823=head2 Default Parameters
824
825It is possible to omit some or all of the final 4 parameters in the
826call to C<tie> and let them take default values. As DB_HASH is the most
827common file format used, the call:
828
829 tie %A, "DB_File", "filename" ;
830
831is equivalent to:
832
18d2dc8c 833 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 834
835It is also possible to omit the filename parameter as well, so the
836call:
837
838 tie %A, "DB_File" ;
839
840is equivalent to:
841
18d2dc8c 842 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 843
f6b705ef 844See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 845in place of a filename.
846
f6b705ef 847=head2 In Memory Databases
848
849Berkeley DB allows the creation of in-memory databases by using NULL
850(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
851uses C<undef> instead of NULL to provide this functionality.
852
853=head1 DB_HASH
854
855The DB_HASH file format is probably the most commonly used of the three
856file formats that B<DB_File> supports. It is also very straightforward
857to use.
858
68dc0745 859=head2 A Simple Example
f6b705ef 860
861This example shows how to create a database, add key/value pairs to the
862database, delete keys/value pairs and finally how to enumerate the
863contents of the database.
864
3245f058 865 use warnings ;
610ab055 866 use strict ;
f6b705ef 867 use DB_File ;
07200f1b 868 our (%h, $k, $v) ;
f6b705ef 869
2c2d71f5 870 unlink "fruit" ;
45a340cb 871 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 872 or die "Cannot open file 'fruit': $!\n";
873
874 # Add a few key/value pairs to the file
875 $h{"apple"} = "red" ;
876 $h{"orange"} = "orange" ;
877 $h{"banana"} = "yellow" ;
878 $h{"tomato"} = "red" ;
879
880 # Check for existence of a key
881 print "Banana Exists\n\n" if $h{"banana"} ;
882
883 # Delete a key/value pair.
884 delete $h{"apple"} ;
885
886 # print the contents of the file
887 while (($k, $v) = each %h)
888 { print "$k -> $v\n" }
889
890 untie %h ;
891
892here is the output:
893
894 Banana Exists
bbc7dcd2 895
f6b705ef 896 orange -> orange
897 tomato -> red
898 banana -> yellow
899
900Note that the like ordinary associative arrays, the order of the keys
901retrieved is in an apparently random order.
902
903=head1 DB_BTREE
904
905The DB_BTREE format is useful when you want to store data in a given
906order. By default the keys will be stored in lexical order, but as you
907will see from the example shown in the next section, it is very easy to
908define your own sorting function.
909
910=head2 Changing the BTREE sort order
911
912This script shows how to override the default sorting algorithm that
913BTREE uses. Instead of using the normal lexical ordering, a case
914insensitive compare function will be used.
88108326 915
3245f058 916 use warnings ;
610ab055 917 use strict ;
f6b705ef 918 use DB_File ;
610ab055
PM
919
920 my %h ;
f6b705ef 921
922 sub Compare
923 {
924 my ($key1, $key2) = @_ ;
925 "\L$key1" cmp "\L$key2" ;
926 }
927
928 # specify the Perl sub that will do the comparison
929 $DB_BTREE->{'compare'} = \&Compare ;
930
2c2d71f5 931 unlink "tree" ;
45a340cb 932 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 933 or die "Cannot open file 'tree': $!\n" ;
934
935 # Add a key/value pair to the file
936 $h{'Wall'} = 'Larry' ;
937 $h{'Smith'} = 'John' ;
938 $h{'mouse'} = 'mickey' ;
939 $h{'duck'} = 'donald' ;
940
941 # Delete
942 delete $h{"duck"} ;
943
944 # Cycle through the keys printing them in order.
945 # Note it is not necessary to sort the keys as
946 # the btree will have kept them in order automatically.
947 foreach (keys %h)
948 { print "$_\n" }
949
950 untie %h ;
951
952Here is the output from the code above.
953
954 mouse
955 Smith
956 Wall
957
958There are a few point to bear in mind if you want to change the
959ordering in a BTREE database:
960
961=over 5
962
963=item 1.
964
965The new compare function must be specified when you create the database.
966
967=item 2.
968
969You cannot change the ordering once the database has been created. Thus
970you must use the same compare function every time you access the
88108326 971database.
972
39793c41
PM
973=item 3
974
975Duplicate keys are entirely defined by the comparison function.
976In the case-insensitive example above, the keys: 'KEY' and 'key'
977would be considered duplicates, and assigning to the second one
52ffee89 978would overwrite the first. If duplicates are allowed for (with the
59e51af5 979R_DUP flag discussed below), only a single copy of duplicate keys
39793c41
PM
980is stored in the database --- so (again with example above) assigning
981three values to the keys: 'KEY', 'Key', and 'key' would leave just
982the first key: 'KEY' in the database with three values. For some
983situations this results in information loss, so care should be taken
984to provide fully qualified comparison functions when necessary.
985For example, the above comparison routine could be modified to
986additionally compare case-sensitively if two keys are equal in the
987case insensitive comparison:
988
989 sub compare {
990 my($key1, $key2) = @_;
991 lc $key1 cmp lc $key2 ||
992 $key1 cmp $key2;
993 }
994
995And now you will only have duplicates when the keys themselves
996are truly the same. (note: in versions of the db library prior to
997about November 1996, such duplicate keys were retained so it was
998possible to recover the original keys in sets of keys that
999compared as equal).
1000
1001
f6b705ef 1002=back
1003
68dc0745 1004=head2 Handling Duplicate Keys
f6b705ef 1005
1006The BTREE file type optionally allows a single key to be associated
1007with an arbitrary number of values. This option is enabled by setting
1008the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1009
88108326 1010There are some difficulties in using the tied hash interface if you
1011want to manipulate a BTREE database with duplicate keys. Consider this
1012code:
1013
3245f058 1014 use warnings ;
610ab055 1015 use strict ;
88108326 1016 use DB_File ;
610ab055 1017
962cee9f 1018 my ($filename, %h) ;
610ab055 1019
88108326 1020 $filename = "tree" ;
1021 unlink $filename ;
bbc7dcd2 1022
88108326 1023 # Enable duplicate records
1024 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1025
45a340cb 1026 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1027 or die "Cannot open $filename: $!\n";
bbc7dcd2 1028
88108326 1029 # Add some key/value pairs to the file
1030 $h{'Wall'} = 'Larry' ;
1031 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1032 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1033 $h{'Smith'} = 'John' ;
1034 $h{'mouse'} = 'mickey' ;
1035
1036 # iterate through the associative array
1037 # and print each key/value pair.
2c2d71f5 1038 foreach (sort keys %h)
88108326 1039 { print "$_ -> $h{$_}\n" }
1040
f6b705ef 1041 untie %h ;
1042
88108326 1043Here is the output:
1044
1045 Smith -> John
1046 Wall -> Larry
1047 Wall -> Larry
f6b705ef 1048 Wall -> Larry
88108326 1049 mouse -> mickey
1050
f6b705ef 1051As you can see 3 records have been successfully created with key C<Wall>
88108326 1052- the only thing is, when they are retrieved from the database they
f6b705ef 1053I<seem> to have the same value, namely C<Larry>. The problem is caused
1054by the way that the associative array interface works. Basically, when
1055the associative array interface is used to fetch the value associated
1056with a given key, it will only ever retrieve the first value.
88108326 1057
1058Although it may not be immediately obvious from the code above, the
1059associative array interface can be used to write values with duplicate
1060keys, but it cannot be used to read them back from the database.
1061
1062The way to get around this problem is to use the Berkeley DB API method
1063called C<seq>. This method allows sequential access to key/value
f6b705ef 1064pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1065and the API in general.
88108326 1066
1067Here is the script above rewritten using the C<seq> API method.
1068
3245f058 1069 use warnings ;
610ab055 1070 use strict ;
88108326 1071 use DB_File ;
bbc7dcd2 1072
962cee9f 1073 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1074
88108326 1075 $filename = "tree" ;
1076 unlink $filename ;
bbc7dcd2 1077
88108326 1078 # Enable duplicate records
1079 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1080
45a340cb 1081 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1082 or die "Cannot open $filename: $!\n";
bbc7dcd2 1083
88108326 1084 # Add some key/value pairs to the file
1085 $h{'Wall'} = 'Larry' ;
1086 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1087 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1088 $h{'Smith'} = 'John' ;
1089 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1090
f6b705ef 1091 # iterate through the btree using seq
88108326 1092 # and print each key/value pair.
610ab055 1093 $key = $value = 0 ;
f6b705ef 1094 for ($status = $x->seq($key, $value, R_FIRST) ;
1095 $status == 0 ;
1096 $status = $x->seq($key, $value, R_NEXT) )
88108326 1097 { print "$key -> $value\n" }
bbc7dcd2 1098
88108326 1099 undef $x ;
1100 untie %h ;
1101
1102that prints:
1103
1104 Smith -> John
1105 Wall -> Brick
f6b705ef 1106 Wall -> Brick
88108326 1107 Wall -> Larry
1108 mouse -> mickey
1109
f6b705ef 1110This time we have got all the key/value pairs, including the multiple
88108326 1111values associated with the key C<Wall>.
1112
6ca2e664
PM
1113To make life easier when dealing with duplicate keys, B<DB_File> comes with
1114a few utility methods.
1115
68dc0745 1116=head2 The get_dup() Method
f6b705ef 1117
6ca2e664 1118The C<get_dup> method assists in
88108326 1119reading duplicate values from BTREE databases. The method can take the
1120following forms:
1121
1122 $count = $x->get_dup($key) ;
1123 @list = $x->get_dup($key) ;
1124 %list = $x->get_dup($key, 1) ;
1125
1126In a scalar context the method returns the number of values associated
1127with the key, C<$key>.
1128
1129In list context, it returns all the values which match C<$key>. Note
f6b705ef 1130that the values will be returned in an apparently random order.
88108326 1131
7a2e2cd6 1132In list context, if the second parameter is present and evaluates
1133TRUE, the method returns an associative array. The keys of the
1134associative array correspond to the values that matched in the BTREE
1135and the values of the array are a count of the number of times that
1136particular value occurred in the BTREE.
88108326 1137
f6b705ef 1138So assuming the database created above, we can use C<get_dup> like
88108326 1139this:
1140
3245f058 1141 use warnings ;
2c2d71f5
JH
1142 use strict ;
1143 use DB_File ;
bbc7dcd2 1144
962cee9f 1145 my ($filename, $x, %h) ;
2c2d71f5
JH
1146
1147 $filename = "tree" ;
bbc7dcd2 1148
2c2d71f5
JH
1149 # Enable duplicate records
1150 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1151
45a340cb 1152 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5
JH
1153 or die "Cannot open $filename: $!\n";
1154
610ab055 1155 my $cnt = $x->get_dup("Wall") ;
88108326 1156 print "Wall occurred $cnt times\n" ;
1157
610ab055 1158 my %hash = $x->get_dup("Wall", 1) ;
88108326 1159 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1160 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1161
2c2d71f5 1162 my @list = sort $x->get_dup("Wall") ;
88108326 1163 print "Wall => [@list]\n" ;
1164
f6b705ef 1165 @list = $x->get_dup("Smith") ;
88108326 1166 print "Smith => [@list]\n" ;
bbc7dcd2 1167
f6b705ef 1168 @list = $x->get_dup("Dog") ;
88108326 1169 print "Dog => [@list]\n" ;
1170
1171
1172and it will print:
1173
f6b705ef 1174 Wall occurred 3 times
88108326 1175 Larry is there
f6b705ef 1176 There are 2 Brick Walls
1177 Wall => [Brick Brick Larry]
88108326 1178 Smith => [John]
1179 Dog => []
3b35bae3 1180
6ca2e664
PM
1181=head2 The find_dup() Method
1182
1183 $status = $X->find_dup($key, $value) ;
1184
b90e71be 1185This method checks for the existence of a specific key/value pair. If the
6ca2e664
PM
1186pair exists, the cursor is left pointing to the pair and the method
1187returns 0. Otherwise the method returns a non-zero value.
1188
1189Assuming the database from the previous example:
1190
3245f058 1191 use warnings ;
6ca2e664
PM
1192 use strict ;
1193 use DB_File ;
bbc7dcd2 1194
962cee9f 1195 my ($filename, $x, %h, $found) ;
6ca2e664 1196
07200f1b 1197 $filename = "tree" ;
bbc7dcd2 1198
6ca2e664
PM
1199 # Enable duplicate records
1200 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1201
45a340cb 1202 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1203 or die "Cannot open $filename: $!\n";
1204
1205 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1206 print "Larry Wall is $found there\n" ;
bbc7dcd2 1207
6ca2e664
PM
1208 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1209 print "Harry Wall is $found there\n" ;
bbc7dcd2 1210
6ca2e664
PM
1211 undef $x ;
1212 untie %h ;
1213
1214prints this
1215
2c2d71f5 1216 Larry Wall is there
6ca2e664
PM
1217 Harry Wall is not there
1218
1219
1220=head2 The del_dup() Method
1221
1222 $status = $X->del_dup($key, $value) ;
1223
1224This method deletes a specific key/value pair. It returns
12250 if they exist and have been deleted successfully.
1226Otherwise the method returns a non-zero value.
1227
b90e71be 1228Again assuming the existence of the C<tree> database
6ca2e664 1229
3245f058 1230 use warnings ;
6ca2e664
PM
1231 use strict ;
1232 use DB_File ;
bbc7dcd2 1233
962cee9f 1234 my ($filename, $x, %h, $found) ;
6ca2e664 1235
07200f1b 1236 $filename = "tree" ;
bbc7dcd2 1237
6ca2e664
PM
1238 # Enable duplicate records
1239 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1240
45a340cb 1241 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1242 or die "Cannot open $filename: $!\n";
1243
1244 $x->del_dup("Wall", "Larry") ;
1245
1246 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1247 print "Larry Wall is $found there\n" ;
bbc7dcd2 1248
6ca2e664
PM
1249 undef $x ;
1250 untie %h ;
1251
1252prints this
1253
1254 Larry Wall is not there
1255
f6b705ef 1256=head2 Matching Partial Keys
1257
1258The BTREE interface has a feature which allows partial keys to be
1259matched. This functionality is I<only> available when the C<seq> method
1260is used along with the R_CURSOR flag.
1261
1262 $x->seq($key, $value, R_CURSOR) ;
1263
1264Here is the relevant quote from the dbopen man page where it defines
1265the use of the R_CURSOR flag with seq:
1266
f6b705ef 1267 Note, for the DB_BTREE access method, the returned key is not
1268 necessarily an exact match for the specified key. The returned key
1269 is the smallest key greater than or equal to the specified key,
1270 permitting partial key matches and range searches.
1271
f6b705ef 1272In the example script below, the C<match> sub uses this feature to find
1273and print the first matching key/value pair given a partial key.
1274
3245f058 1275 use warnings ;
610ab055 1276 use strict ;
f6b705ef 1277 use DB_File ;
1278 use Fcntl ;
610ab055 1279
962cee9f 1280 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1281
1282 sub match
1283 {
1284 my $key = shift ;
610ab055 1285 my $value = 0;
f6b705ef 1286 my $orig_key = $key ;
1287 $x->seq($key, $value, R_CURSOR) ;
1288 print "$orig_key\t-> $key\t-> $value\n" ;
1289 }
1290
1291 $filename = "tree" ;
1292 unlink $filename ;
1293
45a340cb 1294 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1295 or die "Cannot open $filename: $!\n";
bbc7dcd2 1296
f6b705ef 1297 # Add some key/value pairs to the file
1298 $h{'mouse'} = 'mickey' ;
1299 $h{'Wall'} = 'Larry' ;
1300 $h{'Walls'} = 'Brick' ;
1301 $h{'Smith'} = 'John' ;
bbc7dcd2 1302
f6b705ef 1303
610ab055 1304 $key = $value = 0 ;
f6b705ef 1305 print "IN ORDER\n" ;
1306 for ($st = $x->seq($key, $value, R_FIRST) ;
1307 $st == 0 ;
1308 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1309
2c2d71f5 1310 { print "$key -> $value\n" }
bbc7dcd2 1311
f6b705ef 1312 print "\nPARTIAL MATCH\n" ;
1313
1314 match "Wa" ;
1315 match "A" ;
1316 match "a" ;
1317
1318 undef $x ;
1319 untie %h ;
1320
1321Here is the output:
1322
1323 IN ORDER
1324 Smith -> John
1325 Wall -> Larry
1326 Walls -> Brick
1327 mouse -> mickey
1328
1329 PARTIAL MATCH
1330 Wa -> Wall -> Larry
1331 A -> Smith -> John
1332 a -> mouse -> mickey
1333
1334=head1 DB_RECNO
1335
1336DB_RECNO provides an interface to flat text files. Both variable and
1337fixed length records are supported.
3b35bae3 1338
6ca2e664 1339In order to make RECNO more compatible with Perl, the array offset for
88108326 1340all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1341
88108326 1342As with normal Perl arrays, a RECNO array can be accessed using
1343negative indexes. The index -1 refers to the last element of the array,
1344-2 the second last, and so on. Attempting to access an element before
1345the start of the array will raise a fatal run-time error.
3b35bae3 1346
68dc0745 1347=head2 The 'bval' Option
36477c24 1348
1349The operation of the bval option warrants some discussion. Here is the
1350definition of bval from the Berkeley DB 1.85 recno manual page:
1351
1352 The delimiting byte to be used to mark the end of a
1353 record for variable-length records, and the pad charac-
1354 ter for fixed-length records. If no value is speci-
1355 fied, newlines (``\n'') are used to mark the end of
1356 variable-length records and fixed-length records are
1357 padded with spaces.
1358
1359The second sentence is wrong. In actual fact bval will only default to
1360C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1361openinfo parameter is used at all, the value that happens to be in bval
1362will be used. That means you always have to specify bval when making
1363use of any of the options in the openinfo parameter. This documentation
1364error will be fixed in the next release of Berkeley DB.
1365
1366That clarifies the situation with regards Berkeley DB itself. What
1367about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1368quite useful, so B<DB_File> conforms to it.
36477c24 1369
1370That means that you can specify other options (e.g. cachesize) and
1371still have bval default to C<"\n"> for variable length records, and
1372space for fixed length records.
1373
c5da4faf 1374Also note that the bval option only allows you to specify a single byte
a6d6498e 1375as a delimiter.
c5da4faf 1376
f6b705ef 1377=head2 A Simple Example
3b35bae3 1378
6ca2e664
PM
1379Here is a simple example that uses RECNO (if you are using a version
1380of Perl earlier than 5.004_57 this example won't work -- see
1381L<Extra RECNO Methods> for a workaround).
f6b705ef 1382
3245f058 1383 use warnings ;
610ab055 1384 use strict ;
f6b705ef 1385 use DB_File ;
f6b705ef 1386
2c2d71f5
JH
1387 my $filename = "text" ;
1388 unlink $filename ;
1389
610ab055 1390 my @h ;
45a340cb 1391 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1392 or die "Cannot open file 'text': $!\n" ;
1393
1394 # Add a few key/value pairs to the file
1395 $h[0] = "orange" ;
1396 $h[1] = "blue" ;
1397 $h[2] = "yellow" ;
1398
6ca2e664
PM
1399 push @h, "green", "black" ;
1400
1401 my $elements = scalar @h ;
1402 print "The array contains $elements entries\n" ;
1403
1404 my $last = pop @h ;
1405 print "popped $last\n" ;
1406
1407 unshift @h, "white" ;
1408 my $first = shift @h ;
1409 print "shifted $first\n" ;
1410
f6b705ef 1411 # Check for existence of a key
1412 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1413
1414 # use a negative index
1415 print "The last element is $h[-1]\n" ;
1416 print "The 2nd last element is $h[-2]\n" ;
1417
1418 untie @h ;
3b35bae3 1419
f6b705ef 1420Here is the output from the script:
1421
6ca2e664
PM
1422 The array contains 5 entries
1423 popped black
2c2d71f5 1424 shifted white
f6b705ef 1425 Element 1 Exists with value blue
6ca2e664
PM
1426 The last element is green
1427 The 2nd last element is yellow
f6b705ef 1428
6ca2e664 1429=head2 Extra RECNO Methods
f6b705ef 1430
045291aa 1431If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1432array interface is quite limited. In the example script above
1433C<push>, C<pop>, C<shift>, C<unshift>
1434or determining the array length will not work with a tied array.
045291aa
PM
1435
1436To make the interface more useful for older versions of Perl, a number
1437of methods are supplied with B<DB_File> to simulate the missing array
1438operations. All these methods are accessed via the object returned from
1439the tie call.
f6b705ef 1440
1441Here are the methods:
1442
1443=over 5
3b35bae3 1444
f6b705ef 1445=item B<$X-E<gt>push(list) ;>
1446
1447Pushes the elements of C<list> to the end of the array.
1448
1449=item B<$value = $X-E<gt>pop ;>
1450
1451Removes and returns the last element of the array.
1452
1453=item B<$X-E<gt>shift>
1454
1455Removes and returns the first element of the array.
1456
1457=item B<$X-E<gt>unshift(list) ;>
1458
1459Pushes the elements of C<list> to the start of the array.
1460
1461=item B<$X-E<gt>length>
1462
1463Returns the number of elements in the array.
1464
c5da4faf
PM
1465=item B<$X-E<gt>splice(offset, length, elements);>
1466
a6d05634 1467Returns a splice of the array.
c5da4faf 1468
f6b705ef 1469=back
1470
1471=head2 Another Example
1472
1473Here is a more complete example that makes use of some of the methods
1474described above. It also makes use of the API interface directly (see
1475L<THE API INTERFACE>).
1476
3245f058 1477 use warnings ;
f6b705ef 1478 use strict ;
962cee9f 1479 my (@h, $H, $file, $i) ;
f6b705ef 1480 use DB_File ;
1481 use Fcntl ;
bbc7dcd2 1482
f6b705ef 1483 $file = "text" ;
1484
1485 unlink $file ;
1486
45a340cb 1487 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1488 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1489
f6b705ef 1490 # first create a text file to play with
1491 $h[0] = "zero" ;
1492 $h[1] = "one" ;
1493 $h[2] = "two" ;
1494 $h[3] = "three" ;
1495 $h[4] = "four" ;
1496
bbc7dcd2 1497
f6b705ef 1498 # Print the records in order.
1499 #
1500 # The length method is needed here because evaluating a tied
1501 # array in a scalar context does not return the number of
1502 # elements in the array.
1503
1504 print "\nORIGINAL\n" ;
1505 foreach $i (0 .. $H->length - 1) {
1506 print "$i: $h[$i]\n" ;
1507 }
1508
1509 # use the push & pop methods
1510 $a = $H->pop ;
1511 $H->push("last") ;
1512 print "\nThe last record was [$a]\n" ;
1513
1514 # and the shift & unshift methods
1515 $a = $H->shift ;
1516 $H->unshift("first") ;
1517 print "The first record was [$a]\n" ;
1518
1519 # Use the API to add a new record after record 2.
1520 $i = 2 ;
1521 $H->put($i, "Newbie", R_IAFTER) ;
1522
1523 # and a new record before record 1.
1524 $i = 1 ;
1525 $H->put($i, "New One", R_IBEFORE) ;
1526
1527 # delete record 3
1528 $H->del(3) ;
1529
1530 # now print the records in reverse order
1531 print "\nREVERSE\n" ;
1532 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1533 { print "$i: $h[$i]\n" }
1534
1535 # same again, but use the API functions instead
1536 print "\nREVERSE again\n" ;
610ab055 1537 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1538 for ($s = $H->seq($k, $v, R_LAST) ;
1539 $s == 0 ;
1540 $s = $H->seq($k, $v, R_PREV))
1541 { print "$k: $v\n" }
1542
1543 undef $H ;
1544 untie @h ;
1545
1546and this is what it outputs:
1547
1548 ORIGINAL
1549 0: zero
1550 1: one
1551 2: two
1552 3: three
1553 4: four
1554
1555 The last record was [four]
1556 The first record was [zero]
1557
1558 REVERSE
1559 5: last
1560 4: three
1561 3: Newbie
1562 2: one
1563 1: New One
1564 0: first
1565
1566 REVERSE again
1567 5: last
1568 4: three
1569 3: Newbie
1570 2: one
1571 1: New One
1572 0: first
1573
1574Notes:
1575
1576=over 5
1577
1578=item 1.
1579
1580Rather than iterating through the array, C<@h> like this:
1581
1582 foreach $i (@h)
1583
1584it is necessary to use either this:
1585
1586 foreach $i (0 .. $H->length - 1)
1587
1588or this:
1589
1590 for ($a = $H->get($k, $v, R_FIRST) ;
1591 $a == 0 ;
1592 $a = $H->get($k, $v, R_NEXT) )
1593
1594=item 2.
1595
1596Notice that both times the C<put> method was used the record index was
1597specified using a variable, C<$i>, rather than the literal value
1598itself. This is because C<put> will return the record number of the
1599inserted line via that parameter.
1600
1601=back
1602
1603=head1 THE API INTERFACE
3b35bae3
AD
1604
1605As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1606possible to make direct use of most of the API functions defined in the
8e07c86e 1607Berkeley DB documentation.
3b35bae3 1608
88108326 1609To do this you need to store a copy of the object returned from the tie.
3b35bae3 1610
88108326 1611 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1612
8e07c86e 1613Once you have done that, you can access the Berkeley DB API functions
88108326 1614as B<DB_File> methods directly like this:
3b35bae3
AD
1615
1616 $db->put($key, $value, R_NOOVERWRITE) ;
1617
88108326 1618B<Important:> If you have saved a copy of the object returned from
1619C<tie>, the underlying database file will I<not> be closed until both
1620the tied variable is untied and all copies of the saved object are
610ab055 1621destroyed.
88108326 1622
1623 use DB_File ;
1624 $db = tie %hash, "DB_File", "filename"
1625 or die "Cannot tie filename: $!" ;
1626 ...
1627 undef $db ;
1628 untie %hash ;
1629
9a2c4ce3 1630See L<The untie() Gotcha> for more details.
778183f3 1631
88108326 1632All the functions defined in L<dbopen> are available except for
1633close() and dbopen() itself. The B<DB_File> method interface to the
1634supported functions have been implemented to mirror the way Berkeley DB
1635works whenever possible. In particular note that:
1636
1637=over 5
1638
1639=item *
1640
1641The methods return a status value. All return 0 on success.
1642All return -1 to signify an error and set C<$!> to the exact
1643error code. The return code 1 generally (but not always) means that the
1644key specified did not exist in the database.
1645
1646Other return codes are defined. See below and in the Berkeley DB
1647documentation for details. The Berkeley DB documentation should be used
1648as the definitive source.
1649
1650=item *
3b35bae3 1651
88108326 1652Whenever a Berkeley DB function returns data via one of its parameters,
1653the equivalent B<DB_File> method does exactly the same.
3b35bae3 1654
88108326 1655=item *
1656
1657If you are careful, it is possible to mix API calls with the tied
1658hash/array interface in the same piece of code. Although only a few of
1659the methods used to implement the tied interface currently make use of
1660the cursor, you should always assume that the cursor has been changed
1661any time the tied hash/array interface is used. As an example, this
1662code will probably not do what you expect:
1663
1664 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1665 or die "Cannot tie $filename: $!" ;
1666
1667 # Get the first key/value pair and set the cursor
1668 $X->seq($key, $value, R_FIRST) ;
1669
1670 # this line will modify the cursor
1671 $count = scalar keys %x ;
1672
1673 # Get the second key/value pair.
1674 # oops, it didn't, it got the last key/value pair!
1675 $X->seq($key, $value, R_NEXT) ;
1676
1677The code above can be rearranged to get around the problem, like this:
1678
1679 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1680 or die "Cannot tie $filename: $!" ;
1681
1682 # this line will modify the cursor
1683 $count = scalar keys %x ;
1684
1685 # Get the first key/value pair and set the cursor
1686 $X->seq($key, $value, R_FIRST) ;
1687
1688 # Get the second key/value pair.
1689 # worked this time.
1690 $X->seq($key, $value, R_NEXT) ;
1691
1692=back
1693
1694All the constants defined in L<dbopen> for use in the flags parameters
1695in the methods defined below are also available. Refer to the Berkeley
1696DB documentation for the precise meaning of the flags values.
1697
1698Below is a list of the methods available.
3b35bae3
AD
1699
1700=over 5
1701
f6b705ef 1702=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1703
1704Given a key (C<$key>) this method reads the value associated with it
1705from the database. The value read from the database is returned in the
1706C<$value> parameter.
3b35bae3 1707
88108326 1708If the key does not exist the method returns 1.
3b35bae3 1709
88108326 1710No flags are currently defined for this method.
3b35bae3 1711
f6b705ef 1712=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1713
88108326 1714Stores the key/value pair in the database.
1715
1716If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1717will have the record number of the inserted key/value pair set.
3b35bae3 1718
88108326 1719Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1720R_SETCURSOR.
1721
f6b705ef 1722=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1723
88108326 1724Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1725
88108326 1726A return code of 1 means that the requested key was not in the
1727database.
3b35bae3 1728
88108326 1729R_CURSOR is the only valid flag at present.
3b35bae3 1730
f6b705ef 1731=item B<$status = $X-E<gt>fd ;>
3b35bae3 1732
88108326 1733Returns the file descriptor for the underlying database.
3b35bae3 1734
b90e71be
GS
1735See L<Locking: The Trouble with fd> for an explanation for why you should
1736not use C<fd> to lock your database.
3b35bae3 1737
f6b705ef 1738=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1739
88108326 1740This interface allows sequential retrieval from the database. See
1741L<dbopen> for full details.
1742
1743Both the C<$key> and C<$value> parameters will be set to the key/value
1744pair read from the database.
1745
1746The flags parameter is mandatory. The valid flag values are R_CURSOR,
1747R_FIRST, R_LAST, R_NEXT and R_PREV.
1748
f6b705ef 1749=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1750
1751Flushes any cached buffers to disk.
1752
1753R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1754
1755=back
1756
cad2e5aa
JH
1757=head1 DBM FILTERS
1758
1759A DBM Filter is a piece of code that is be used when you I<always>
1760want to make the same transformation to all keys and/or values in a
1761DBM database.
1762
1763There are four methods associated with DBM Filters. All work identically,
1764and each is used to install (or uninstall) a single DBM Filter. Each
1765expects a single parameter, namely a reference to a sub. The only
1766difference between them is the place that the filter is installed.
1767
1768To summarise:
1769
1770=over 5
1771
1772=item B<filter_store_key>
1773
1774If a filter has been installed with this method, it will be invoked
1775every time you write a key to a DBM database.
1776
1777=item B<filter_store_value>
1778
1779If a filter has been installed with this method, it will be invoked
1780every time you write a value to a DBM database.
1781
1782
1783=item B<filter_fetch_key>
1784
1785If a filter has been installed with this method, it will be invoked
1786every time you read a key from a DBM database.
1787
1788=item B<filter_fetch_value>
1789
1790If a filter has been installed with this method, it will be invoked
1791every time you read a value from a DBM database.
1792
1793=back
1794
1795You can use any combination of the methods, from none, to all four.
1796
1797All filter methods return the existing filter, if present, or C<undef>
1798in not.
1799
1800To delete a filter pass C<undef> to it.
1801
1802=head2 The Filter
1803
1804When each filter is called by Perl, a local copy of C<$_> will contain
1805the key or value to be filtered. Filtering is achieved by modifying
1806the contents of C<$_>. The return code from the filter is ignored.
1807
1808=head2 An Example -- the NULL termination problem.
1809
1810Consider the following scenario. You have a DBM database
1811that you need to share with a third-party C application. The C application
1812assumes that I<all> keys and values are NULL terminated. Unfortunately
1813when Perl writes to DBM databases it doesn't use NULL termination, so
1814your Perl application will have to manage NULL termination itself. When
1815you write to the database you will have to use something like this:
1816
1817 $hash{"$key\0"} = "$value\0" ;
1818
1819Similarly the NULL needs to be taken into account when you are considering
1820the length of existing keys/values.
1821
1822It would be much better if you could ignore the NULL terminations issue
1823in the main application code and have a mechanism that automatically
1824added the terminating NULL to all keys and values whenever you write to
1825the database and have them removed when you read from the database. As I'm
1826sure you have already guessed, this is a problem that DBM Filters can
1827fix very easily.
1828
3245f058 1829 use warnings ;
cad2e5aa
JH
1830 use strict ;
1831 use DB_File ;
1832
1833 my %hash ;
2359510d 1834 my $filename = "filt" ;
cad2e5aa
JH
1835 unlink $filename ;
1836
1837 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1838 or die "Cannot open $filename: $!\n" ;
1839
1840 # Install DBM Filters
1841 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1842 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1843 $db->filter_fetch_value( sub { s/\0$// } ) ;
1844 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1845
1846 $hash{"abc"} = "def" ;
1847 my $a = $hash{"ABC"} ;
1848 # ...
1849 undef $db ;
1850 untie %hash ;
1851
1852Hopefully the contents of each of the filters should be
1853self-explanatory. Both "fetch" filters remove the terminating NULL,
1854and both "store" filters add a terminating NULL.
1855
1856
1857=head2 Another Example -- Key is a C int.
1858
1859Here is another real-life example. By default, whenever Perl writes to
1860a DBM database it always writes the key and value as strings. So when
1861you use this:
1862
3c4b39be 1863 $hash{12345} = "something" ;
cad2e5aa
JH
1864
1865the key 12345 will get stored in the DBM database as the 5 byte string
1866"12345". If you actually want the key to be stored in the DBM database
1867as a C int, you will have to use C<pack> when writing, and C<unpack>
1868when reading.
1869
1870Here is a DBM Filter that does it:
1871
3245f058 1872 use warnings ;
cad2e5aa
JH
1873 use strict ;
1874 use DB_File ;
1875 my %hash ;
2359510d 1876 my $filename = "filt" ;
cad2e5aa
JH
1877 unlink $filename ;
1878
1879
1880 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1881 or die "Cannot open $filename: $!\n" ;
1882
1883 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1884 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1885 $hash{123} = "def" ;
1886 # ...
1887 undef $db ;
1888 untie %hash ;
1889
1890This time only two filters have been used -- we only need to manipulate
1891the contents of the key, so it wasn't necessary to install any value
1892filters.
1893
f6b705ef 1894=head1 HINTS AND TIPS
3b35bae3 1895
3b35bae3 1896
b90e71be 1897=head2 Locking: The Trouble with fd
3b35bae3 1898
b90e71be
GS
1899Until version 1.72 of this module, the recommended technique for locking
1900B<DB_File> databases was to flock the filehandle returned from the "fd"
1901function. Unfortunately this technique has been shown to be fundamentally
1902flawed (Kudos to David Harris for tracking this down). Use it at your own
1903peril!
3b35bae3 1904
b90e71be 1905The locking technique went like this.
cb1a09d0 1906
2359510d
SD
1907 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1908 || die "dbcreat foo.db $!";
b90e71be
GS
1909 $fd = $db->fd;
1910 open(DB_FH, "+<&=$fd") || die "dup $!";
1911 flock (DB_FH, LOCK_EX) || die "flock: $!";
1912 ...
1913 $db{"Tom"} = "Jerry" ;
1914 ...
1915 flock(DB_FH, LOCK_UN);
1916 undef $db;
1917 untie %db;
1918 close(DB_FH);
cb1a09d0 1919
b90e71be 1920In simple terms, this is what happens:
cb1a09d0 1921
b90e71be 1922=over 5
cb1a09d0 1923
b90e71be 1924=item 1.
cb1a09d0 1925
b90e71be 1926Use "tie" to open the database.
cb1a09d0 1927
b90e71be 1928=item 2.
cb1a09d0 1929
b90e71be 1930Lock the database with fd & flock.
cb1a09d0 1931
b90e71be 1932=item 3.
cb1a09d0 1933
b90e71be 1934Read & Write to the database.
cb1a09d0 1935
b90e71be 1936=item 4.
cb1a09d0 1937
b90e71be 1938Unlock and close the database.
cb1a09d0 1939
b90e71be
GS
1940=back
1941
1942Here is the crux of the problem. A side-effect of opening the B<DB_File>
1943database in step 2 is that an initial block from the database will get
1944read from disk and cached in memory.
1945
1946To see why this is a problem, consider what can happen when two processes,
1947say "A" and "B", both want to update the same B<DB_File> database
1948using the locking steps outlined above. Assume process "A" has already
1949opened the database and has a write lock, but it hasn't actually updated
1950the database yet (it has finished step 2, but not started step 3 yet). Now
1951process "B" tries to open the same database - step 1 will succeed,
1952but it will block on step 2 until process "A" releases the lock. The
1953important thing to notice here is that at this point in time both
1954processes will have cached identical initial blocks from the database.
1955
1956Now process "A" updates the database and happens to change some of the
1957data held in the initial buffer. Process "A" terminates, flushing
1958all cached data to disk and releasing the database lock. At this point
1959the database on disk will correctly reflect the changes made by process
1960"A".
1961
1962With the lock released, process "B" can now continue. It also updates the
1963database and unfortunately it too modifies the data that was in its
1964initial buffer. Once that data gets flushed to disk it will overwrite
1965some/all of the changes process "A" made to the database.
1966
1967The result of this scenario is at best a database that doesn't contain
1968what you expect. At worst the database will corrupt.
1969
1970The above won't happen every time competing process update the same
1971B<DB_File> database, but it does illustrate why the technique should
1972not be used.
1973
1974=head2 Safe ways to lock a database
1975
1976Starting with version 2.x, Berkeley DB has internal support for locking.
1977The companion module to this one, B<BerkeleyDB>, provides an interface
1978to this locking functionality. If you are serious about locking
1979Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1980
1981If using B<BerkeleyDB> isn't an option, there are a number of modules
1982available on CPAN that can be used to implement locking. Each one
1983implements locking differently and has different goals in mind. It is
1984therefore worth knowing the difference, so that you can pick the right
1985one for your application. Here are the three locking wrappers:
1986
1987=over 5
1988
1989=item B<Tie::DB_Lock>
1990
1991A B<DB_File> wrapper which creates copies of the database file for
1992read access, so that you have a kind of a multiversioning concurrent read
1993system. However, updates are still serial. Use for databases where reads
1994may be lengthy and consistency problems may occur.
1995
1996=item B<Tie::DB_LockFile>
1997
1998A B<DB_File> wrapper that has the ability to lock and unlock the database
1999while it is being used. Avoids the tie-before-flock problem by simply
2000re-tie-ing the database when you get or drop a lock. Because of the
2001flexibility in dropping and re-acquiring the lock in the middle of a
2002session, this can be massaged into a system that will work with long
2003updates and/or reads if the application follows the hints in the POD
2004documentation.
2005
2006=item B<DB_File::Lock>
2007
2008An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2009before tie-ing the database and drops the lock after the untie. Allows
2010one to use the same lockfile for multiple databases to avoid deadlock
2011problems, if desired. Use for databases where updates are reads are
2012quick and simple flock locking semantics are enough.
2013
2014=back
cb1a09d0 2015
68dc0745 2016=head2 Sharing Databases With C Applications
f6b705ef 2017
2018There is no technical reason why a Berkeley DB database cannot be
2019shared by both a Perl and a C application.
2020
2021The vast majority of problems that are reported in this area boil down
2022to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2023not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 2024
2025Here is a real example. Netscape 2.0 keeps a record of the locations you
2026visit along with the time you last visited them in a DB_HASH database.
2027This is usually stored in the file F<~/.netscape/history.db>. The key
2028field in the database is the location string and the value field is the
2029time the location was last visited stored as a 4 byte binary value.
2030
2031If you haven't already guessed, the location string is stored with a
2032terminating NULL. This means you need to be careful when accessing the
2033database.
2034
2035Here is a snippet of code that is loosely based on Tom Christiansen's
2036I<ggh> script (available from your nearest CPAN archive in
2037F<authors/id/TOMC/scripts/nshist.gz>).
2038
3245f058 2039 use warnings ;
610ab055 2040 use strict ;
f6b705ef 2041 use DB_File ;
2042 use Fcntl ;
f6b705ef 2043
962cee9f 2044 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2045 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2046
2047 $HISTORY = "$dotdir/.netscape/history.db";
2048
2049 tie %hist_db, 'DB_File', $HISTORY
2050 or die "Cannot open $HISTORY: $!\n" ;;
2051
2052 # Dump the complete database
2053 while ( ($href, $binary_time) = each %hist_db ) {
2054
2055 # remove the terminating NULL
2056 $href =~ s/\x00$// ;
2057
2058 # convert the binary time into a user friendly string
2059 $date = localtime unpack("V", $binary_time);
2060 print "$date $href\n" ;
2061 }
2062
2063 # check for the existence of a specific key
2064 # remember to add the NULL
2065 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2066 $date = localtime unpack("V", $binary_time) ;
2067 print "Last visited mox.perl.com on $date\n" ;
2068 }
2069 else {
2070 print "Never visited mox.perl.com\n"
2071 }
2072
2073 untie %hist_db ;
2074
68dc0745 2075=head2 The untie() Gotcha
778183f3 2076
7a2e2cd6 2077If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2078recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2079
2080Even if you don't currently make use of the API interface, it is still
2081worth reading it.
2082
2083Here is an example which illustrates the problem from a B<DB_File>
2084perspective:
2085
2086 use DB_File ;
2087 use Fcntl ;
2088
2089 my %x ;
2090 my $X ;
2091
2092 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2093 or die "Cannot tie first time: $!" ;
2094
2095 $x{123} = 456 ;
2096
2097 untie %x ;
2098
2099 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2100 or die "Cannot tie second time: $!" ;
2101
2102 untie %x ;
2103
2104When run, the script will produce this error message:
2105
2106 Cannot tie second time: Invalid argument at bad.file line 14.
2107
2108Although the error message above refers to the second tie() statement
2109in the script, the source of the problem is really with the untie()
2110statement that precedes it.
2111
2112Having read L<perltie> you will probably have already guessed that the
2113error is caused by the extra copy of the tied object stored in C<$X>.
2114If you haven't, then the problem boils down to the fact that the
2115B<DB_File> destructor, DESTROY, will not be called until I<all>
2116references to the tied object are destroyed. Both the tied variable,
2117C<%x>, and C<$X> above hold a reference to the object. The call to
2118untie() will destroy the first, but C<$X> still holds a valid
2119reference, so the destructor will not get called and the database file
2120F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2121attempt to open a database that is already open via the catch-all
778183f3
PM
2122"Invalid argument" doesn't help.
2123
2124If you run the script with the C<-w> flag the error message becomes:
2125
2126 untie attempted while 1 inner references still exist at bad.file line 12.
2127 Cannot tie second time: Invalid argument at bad.file line 14.
2128
2129which pinpoints the real problem. Finally the script can now be
2130modified to fix the original problem by destroying the API object
2131before the untie:
2132
2133 ...
2134 $x{123} = 456 ;
2135
2136 undef $X ;
2137 untie %x ;
2138
2139 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2140 ...
2141
f6b705ef 2142
2143=head1 COMMON QUESTIONS
2144
2145=head2 Why is there Perl source in my database?
2146
2147If you look at the contents of a database file created by DB_File,
2148there can sometimes be part of a Perl script included in it.
2149
2150This happens because Berkeley DB uses dynamic memory to allocate
2151buffers which will subsequently be written to the database file. Being
2152dynamic, the memory could have been used for anything before DB
2153malloced it. As Berkeley DB doesn't clear the memory once it has been
2154allocated, the unused portions will contain random junk. In the case
2155where a Perl script gets written to the database, the random junk will
2156correspond to an area of dynamic memory that happened to be used during
2157the compilation of the script.
2158
2159Unless you don't like the possibility of there being part of your Perl
2160scripts embedded in a database file, this is nothing to worry about.
2161
2162=head2 How do I store complex data structures with DB_File?
2163
2164Although B<DB_File> cannot do this directly, there is a module which
2165can layer transparently over B<DB_File> to accomplish this feat.
2166
2167Check out the MLDBM module, available on CPAN in the directory
2168F<modules/by-module/MLDBM>.
2169
2170=head2 What does "Invalid Argument" mean?
2171
2172You will get this error message when one of the parameters in the
2173C<tie> call is wrong. Unfortunately there are quite a few parameters to
2174get wrong, so it can be difficult to figure out which one it is.
2175
2176Here are a couple of possibilities:
2177
2178=over 5
2179
2180=item 1.
2181
610ab055 2182Attempting to reopen a database without closing it.
f6b705ef 2183
2184=item 2.
2185
2186Using the O_WRONLY flag.
2187
2188=back
2189
2190=head2 What does "Bareword 'DB_File' not allowed" mean?
2191
2192You will encounter this particular error message when you have the
2193C<strict 'subs'> pragma (or the full strict pragma) in your script.
2194Consider this script:
2195
3245f058 2196 use warnings ;
f6b705ef 2197 use strict ;
2198 use DB_File ;
07200f1b 2199 my %x ;
f6b705ef 2200 tie %x, DB_File, "filename" ;
2201
2202Running it produces the error in question:
2203
2204 Bareword "DB_File" not allowed while "strict subs" in use
2205
2206To get around the error, place the word C<DB_File> in either single or
2207double quotes, like this:
2208
2209 tie %x, "DB_File", "filename" ;
2210
2211Although it might seem like a real pain, it is really worth the effort
2212of having a C<use strict> in all your scripts.
2213
cad2e5aa
JH
2214=head1 REFERENCES
2215
2216Articles that are either about B<DB_File> or make use of it.
2217
2218=over 5
2219
2220=item 1.
2221
2222I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2223Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2224
2225=back
2226
cb1a09d0
AD
2227=head1 HISTORY
2228
1f70e1ea 2229Moved to the Changes file.
610ab055 2230
1f70e1ea 2231=head1 BUGS
05475680 2232
1f70e1ea
PM
2233Some older versions of Berkeley DB had problems with fixed length
2234records using the RECNO file format. This problem has been fixed since
2235version 1.85 of Berkeley DB.
e858de61 2236
1f70e1ea
PM
2237I am sure there are bugs in the code. If you do find any, or can
2238suggest any enhancements, I would welcome your comments.
a6ed719b 2239
1f70e1ea 2240=head1 AVAILABILITY
a6ed719b 2241
1f70e1ea
PM
2242B<DB_File> comes with the standard Perl source distribution. Look in
2243the directory F<ext/DB_File>. Given the amount of time between releases
2244of Perl the version that ships with Perl is quite likely to be out of
2245date, so the most recent version can always be found on CPAN (see
5bbd4290 2246L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2247F<modules/by-module/DB_File>.
a6ed719b 2248
039d031f
PM
2249This version of B<DB_File> will work with either version 1.x, 2.x or
22503.x of Berkeley DB, but is limited to the functionality provided by
2251version 1.
a6ed719b 2252
10261a36 2253The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>.
039d031f 2254All versions of Berkeley DB are available there.
93af7a87 2255
1f70e1ea
PM
2256Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2257archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2258
1f70e1ea
PM
2259If you are running IRIX, then get Berkeley DB version 1 from
2260F<http://reality.sgi.com/ariel>. It has the patches necessary to
2261compile properly on IRIX 5.3.
610ab055 2262
1f70e1ea 2263=head1 COPYRIGHT
3b35bae3 2264
10261a36 2265Copyright (c) 1995-2007 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2266is free software; you can redistribute it and/or modify it under the
2267same terms as Perl itself.
3b35bae3 2268
1f70e1ea
PM
2269Although B<DB_File> is covered by the Perl license, the library it
2270makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2271copyright and its own license. Please take the time to read it.
3b35bae3 2272
a9fd575d 2273Here are are few words taken from the Berkeley DB FAQ (at
10261a36 2274F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license:
68dc0745 2275
a9fd575d 2276 Do I have to license DB to use it in Perl scripts?
3b35bae3 2277
a9fd575d
PM
2278 No. The Berkeley DB license requires that software that uses
2279 Berkeley DB be freely redistributable. In the case of Perl, that
2280 software is Perl, and not your scripts. Any Perl scripts that you
2281 write are your property, including scripts that make use of
2282 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2283 place any restriction on what you may do with them.
88108326 2284
1f70e1ea
PM
2285If you are in any doubt about the license situation, contact either the
2286Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2287
2288
3b35bae3
AD
2289=head1 SEE ALSO
2290
5bbd4290
PM
2291L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2292L<perldbmfilter>
3b35bae3 2293
3b35bae3
AD
2294=head1 AUTHOR
2295
8e07c86e 2296The DB_File interface was written by Paul Marquess
5bbd4290 2297E<lt>pmqs@cpan.orgE<gt>.
3b35bae3
AD
2298
2299=cut