This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Change 31798 forgot to add the op 'once' to Opcode.pm
[perl5.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e
LW
1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6d02d21f 3# written by Paul Marquess (pmqs@cpan.org)
f1aa208b
RGS
4# last modified 4th February 2007
5# version 1.815
36477c24 6#
f1aa208b 7# Copyright (c) 1995-2007 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e
AD
11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e
AD
28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
efc79c7d
PM
35 bless { VALID => {
36 bsize => 1,
37 ffactor => 1,
38 nelem => 1,
39 cachesize => 1,
40 hash => 2,
41 lorder => 1,
36477c24 42 },
43 GOT => {}
44 }, $pkg ;
88108326 45}
8e07c86e 46
610ab055 47
8e07c86e
AD
48sub FETCH
49{
88108326 50 my $self = shift ;
51 my $key = shift ;
8e07c86e 52
36477c24 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 54
55 my $pkg = ref $self ;
56 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
57}
58
59
60sub STORE
61{
88108326 62 my $self = shift ;
63 my $key = shift ;
64 my $value = shift ;
65
efc79c7d
PM
66 my $type = $self->{VALID}{$key};
67
68 if ( $type )
8e07c86e 69 {
efc79c7d
PM
70 croak "Key '$key' not associated with a code reference"
71 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 72 $self->{GOT}{$key} = $value ;
8e07c86e
AD
73 return ;
74 }
75
88108326 76 my $pkg = ref $self ;
77 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
78}
79
80sub DELETE
81{
88108326 82 my $self = shift ;
83 my $key = shift ;
84
36477c24 85 if ( exists $self->{VALID}{$key} )
8e07c86e 86 {
36477c24 87 delete $self->{GOT}{$key} ;
8e07c86e
AD
88 return ;
89 }
90
88108326 91 my $pkg = ref $self ;
92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
93}
94
88108326 95sub EXISTS
8e07c86e 96{
88108326 97 my $self = shift ;
98 my $key = shift ;
8e07c86e 99
36477c24 100 exists $self->{VALID}{$key} ;
8e07c86e
AD
101}
102
88108326 103sub NotHere
8e07c86e 104{
18d2dc8c 105 my $self = shift ;
88108326 106 my $method = shift ;
8e07c86e 107
18d2dc8c 108 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
109}
110
18d2dc8c
PM
111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
114
115package DB_File::RECNOINFO ;
785da04d 116
3245f058 117use warnings;
88108326 118use strict ;
119
045291aa 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
121
122sub TIEHASH
123{
88108326 124 my $pkg = shift ;
125
36477c24 126 bless { VALID => { map {$_, 1}
127 qw( bval cachesize psize flags lorder reclen bfname )
128 },
129 GOT => {},
130 }, $pkg ;
8e07c86e
AD
131}
132
88108326 133package DB_File::BTREEINFO ;
8e07c86e 134
3245f058 135use warnings;
88108326 136use strict ;
8e07c86e 137
88108326 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 139
88108326 140sub TIEHASH
8e07c86e 141{
88108326 142 my $pkg = shift ;
143
efc79c7d
PM
144 bless { VALID => {
145 flags => 1,
146 cachesize => 1,
147 maxkeypage => 1,
148 minkeypage => 1,
149 psize => 1,
150 compare => 2,
151 prefix => 2,
152 lorder => 1,
36477c24 153 },
154 GOT => {},
155 }, $pkg ;
8e07c86e
AD
156}
157
158
8e07c86e 159package DB_File ;
785da04d 160
3245f058 161use warnings;
785da04d 162use strict;
07200f1b 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
c36891e2 164our ($db_version, $use_XSLoader, $splice_end_array, $Error);
8e07c86e
AD
165use Carp;
166
785da04d 167
f1aa208b 168$VERSION = "1.815" ;
d85a743d
PM
169
170{
171 local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
172 my @a =(1); splice(@a, 3);
173 $splice_end_array =
174 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
175}
8e07c86e
AD
176
177#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 178$DB_BTREE = new DB_File::BTREEINFO ;
179$DB_HASH = new DB_File::HASHINFO ;
180$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 181
785da04d 182require Tie::Hash;
8e07c86e
AD
183require Exporter;
184use AutoLoader;
b90e71be
GS
185BEGIN {
186 $use_XSLoader = 1 ;
e5021521 187 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be
GS
188
189 if ($@) {
190 $use_XSLoader = 0 ;
191 require DynaLoader;
192 @ISA = qw(DynaLoader);
193 }
194}
195
196push @ISA, qw(Tie::Hash Exporter);
8e07c86e
AD
197@EXPORT = qw(
198 $DB_BTREE $DB_HASH $DB_RECNO
88108326 199
8e07c86e
AD
200 BTREEMAGIC
201 BTREEVERSION
202 DB_LOCK
203 DB_SHMEM
204 DB_TXN
205 HASHMAGIC
206 HASHVERSION
207 MAX_PAGE_NUMBER
208 MAX_PAGE_OFFSET
209 MAX_REC_NUMBER
210 RET_ERROR
211 RET_SPECIAL
212 RET_SUCCESS
213 R_CURSOR
214 R_DUP
215 R_FIRST
216 R_FIXEDLEN
217 R_IAFTER
218 R_IBEFORE
219 R_LAST
220 R_NEXT
221 R_NOKEY
222 R_NOOVERWRITE
223 R_PREV
224 R_RECNOSYNC
225 R_SETCURSOR
226 R_SNAPSHOT
227 __R_UNUSED
88108326 228
045291aa 229);
8e07c86e
AD
230
231sub AUTOLOAD {
785da04d 232 my($constname);
8e07c86e 233 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b
PM
234 my ($error, $val) = constant($constname);
235 Carp::croak $error if $error;
57c77851
JS
236 no strict 'refs';
237 *{$AUTOLOAD} = sub { $val };
238 goto &{$AUTOLOAD};
07200f1b 239}
8e07c86e 240
f6b705ef 241
a6ed719b 242eval {
1f70e1ea
PM
243 # Make all Fcntl O_XXX constants available for importing
244 require Fcntl;
245 my @O = grep /^O_/, @Fcntl::EXPORT;
246 Fcntl->import(@O); # first we import what we want to export
247 push(@EXPORT, @O);
a6ed719b 248};
f6b705ef 249
b90e71be
GS
250if ($use_XSLoader)
251 { XSLoader::load("DB_File", $VERSION)}
252else
253 { bootstrap DB_File $VERSION }
8e07c86e
AD
254
255# Preloaded methods go here. Autoload methods go after __END__, and are
256# processed by the autosplit program.
257
05475680 258sub tie_hash_or_array
610ab055
PM
259{
260 my (@arg) = @_ ;
05475680 261 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 262
f1aa208b
RGS
263 use File::Spec;
264 $arg[1] = File::Spec->rel2abs($arg[1])
265 if defined $arg[1] ;
266
610ab055
PM
267 $arg[4] = tied %{ $arg[4] }
268 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
269
efc79c7d
PM
270 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
271 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
272
9c095db2
PM
273 # make recno in Berkeley DB version 2 (or better) work like
274 # recno in version 1.
d6067fe3
SP
275 if ($db_version >= 4 and ! $tieHASH) {
276 $arg[2] |= O_CREAT();
277 }
278
1f70e1ea
PM
279 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
280 $arg[1] and ! -e $arg[1]) {
281 open(FH, ">$arg[1]") or return undef ;
282 close FH ;
283 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
284 }
285
05475680 286 DoTie_($tieHASH, @arg) ;
610ab055
PM
287}
288
05475680
PM
289sub TIEHASH
290{
291 tie_hash_or_array(@_) ;
292}
293
294sub TIEARRAY
295{
296 tie_hash_or_array(@_) ;
297}
88108326 298
045291aa
PM
299sub CLEAR
300{
1f70e1ea 301 my $self = shift;
3245f058 302 my $key = 0 ;
1f70e1ea
PM
303 my $value = "" ;
304 my $status = $self->seq($key, $value, R_FIRST());
305 my @keys;
306
307 while ($status == 0) {
308 push @keys, $key;
309 $status = $self->seq($key, $value, R_NEXT());
310 }
311 foreach $key (reverse @keys) {
312 my $s = $self->del($key);
313 }
314}
315
045291aa
PM
316sub EXTEND { }
317
318sub STORESIZE
319{
320 my $self = shift;
321 my $length = shift ;
322 my $current_length = $self->length() ;
323
324 if ($length < $current_length) {
325 my $key ;
326 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
327 { $self->del($key) }
328 }
a9fd575d
PM
329 elsif ($length > $current_length) {
330 $self->put($length-1, "") ;
331 }
045291aa
PM
332}
333
c5da4faf
PM
334
335sub SPLICE
336{
337 my $self = shift;
338 my $offset = shift;
339 if (not defined $offset) {
d85a743d 340 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
341 $offset = 0;
342 }
343
344 my $length = @_ ? shift : 0;
345 # Carping about definedness comes _after_ the OFFSET sanity check.
346 # This is so we get the same error messages as Perl's splice().
347 #
348
349 my @list = @_;
350
351 my $size = $self->FETCHSIZE();
352
353 # 'If OFFSET is negative then it start that far from the end of
354 # the array.'
355 #
356 if ($offset < 0) {
357 my $new_offset = $size + $offset;
358 if ($new_offset < 0) {
359 die "Modification of non-creatable array value attempted, "
360 . "subscript $offset";
361 }
362 $offset = $new_offset;
363 }
364
c5da4faf 365 if (not defined $length) {
d85a743d 366 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
367 $length = 0;
368 }
369
d85a743d
PM
370 if ($offset > $size) {
371 $offset = $size;
372 warnings::warnif('misc', 'splice() offset past end of array')
373 if $splice_end_array;
374 }
375
c5da4faf
PM
376 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
377 if (not defined $length) {
378 $length = $size - $offset;
379 }
380
381 # 'If LENGTH is negative, leave that many elements off the end of
382 # the array.'
383 #
384 if ($length < 0) {
385 $length = $size - $offset + $length;
386
387 if ($length < 0) {
388 # The user must have specified a length bigger than the
389 # length of the array passed in. But perl's splice()
390 # doesn't catch this, it just behaves as for length=0.
391 #
392 $length = 0;
393 }
394 }
395
396 if ($length > $size - $offset) {
397 $length = $size - $offset;
398 }
399
400 # $num_elems holds the current number of elements in the database.
401 my $num_elems = $size;
402
403 # 'Removes the elements designated by OFFSET and LENGTH from an
404 # array,'...
405 #
406 my @removed = ();
407 foreach (0 .. $length - 1) {
408 my $old;
409 my $status = $self->get($offset, $old);
410 if ($status != 0) {
411 my $msg = "error from Berkeley DB on get($offset, \$old)";
412 if ($status == 1) {
413 $msg .= ' (no such element?)';
414 }
415 else {
416 $msg .= ": error status $status";
417 if (defined $! and $! ne '') {
418 $msg .= ", message $!";
419 }
420 }
421 die $msg;
422 }
423 push @removed, $old;
424
425 $status = $self->del($offset);
426 if ($status != 0) {
427 my $msg = "error from Berkeley DB on del($offset)";
428 if ($status == 1) {
429 $msg .= ' (no such element?)';
430 }
431 else {
432 $msg .= ": error status $status";
433 if (defined $! and $! ne '') {
434 $msg .= ", message $!";
435 }
436 }
437 die $msg;
438 }
439
440 -- $num_elems;
441 }
442
443 # ...'and replaces them with the elements of LIST, if any.'
444 my $pos = $offset;
445 while (defined (my $elem = shift @list)) {
446 my $old_pos = $pos;
447 my $status;
448 if ($pos >= $num_elems) {
449 $status = $self->put($pos, $elem);
450 }
451 else {
452 $status = $self->put($pos, $elem, $self->R_IBEFORE);
453 }
454
455 if ($status != 0) {
456 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
457 if ($status == 1) {
458 $msg .= ' (no such element?)';
459 }
460 else {
461 $msg .= ", error status $status";
462 if (defined $! and $! ne '') {
463 $msg .= ", message $!";
464 }
465 }
466 die $msg;
467 }
468
469 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
470 if $old_pos != $pos;
471
472 ++ $pos;
473 ++ $num_elems;
474 }
475
476 if (wantarray) {
477 # 'In list context, returns the elements removed from the
478 # array.'
479 #
480 return @removed;
481 }
482 elsif (defined wantarray and not wantarray) {
483 # 'In scalar context, returns the last element removed, or
484 # undef if no elements are removed.'
485 #
486 if (@removed) {
487 my $last = pop @removed;
488 return "$last";
489 }
490 else {
491 return undef;
492 }
493 }
494 elsif (not defined wantarray) {
495 # Void context
496 }
497 else { die }
498}
499sub ::DB_File::splice { &SPLICE }
500
6ca2e664
PM
501sub find_dup
502{
503 croak "Usage: \$db->find_dup(key,value)\n"
504 unless @_ == 3 ;
505
506 my $db = shift ;
507 my ($origkey, $value_wanted) = @_ ;
508 my ($key, $value) = ($origkey, 0);
509 my ($status) = 0 ;
510
511 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
512 $status == 0 ;
513 $status = $db->seq($key, $value, R_NEXT() ) ) {
514
515 return 0 if $key eq $origkey and $value eq $value_wanted ;
516 }
517
518 return $status ;
519}
520
521sub del_dup
522{
523 croak "Usage: \$db->del_dup(key,value)\n"
524 unless @_ == 3 ;
525
526 my $db = shift ;
527 my ($key, $value) = @_ ;
528 my ($status) = $db->find_dup($key, $value) ;
529 return $status if $status != 0 ;
530
531 $status = $db->del($key, R_CURSOR() ) ;
532 return $status ;
533}
534
88108326 535sub get_dup
536{
537 croak "Usage: \$db->get_dup(key [,flag])\n"
538 unless @_ == 2 or @_ == 3 ;
539
540 my $db = shift ;
541 my $key = shift ;
542 my $flag = shift ;
f6b705ef 543 my $value = 0 ;
88108326 544 my $origkey = $key ;
545 my $wantarray = wantarray ;
f6b705ef 546 my %values = () ;
88108326 547 my @values = () ;
548 my $counter = 0 ;
f6b705ef 549 my $status = 0 ;
88108326 550
f6b705ef 551 # iterate through the database until either EOF ($status == 0)
552 # or a different key is encountered ($key ne $origkey).
553 for ($status = $db->seq($key, $value, R_CURSOR()) ;
554 $status == 0 and $key eq $origkey ;
555 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 556
f6b705ef 557 # save the value or count number of matches
558 if ($wantarray) {
559 if ($flag)
560 { ++ $values{$value} }
561 else
562 { push (@values, $value) }
563 }
564 else
565 { ++ $counter }
88108326 566
88108326 567 }
568
f6b705ef 569 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 570}
571
572
8e07c86e
AD
5731;
574__END__
575
3b35bae3
AD
576=head1 NAME
577
1f70e1ea 578DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
579
580=head1 SYNOPSIS
581
bbc7dcd2
MS
582 use DB_File;
583
88108326 584 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
585 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
586 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 587
3b35bae3
AD
588 $status = $X->del($key [, $flags]) ;
589 $status = $X->put($key, $value [, $flags]) ;
590 $status = $X->get($key, $value [, $flags]) ;
760ac839 591 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
592 $status = $X->sync([$flags]) ;
593 $status = $X->fd ;
760ac839 594
f6b705ef 595 # BTREE only
88108326 596 $count = $X->get_dup($key) ;
597 @list = $X->get_dup($key) ;
598 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
599 $status = $X->find_dup($key, $value) ;
600 $status = $X->del_dup($key, $value) ;
88108326 601
f6b705ef 602 # RECNO only
603 $a = $X->length;
604 $a = $X->pop ;
605 $X->push(list);
606 $a = $X->shift;
607 $X->unshift(list);
c5da4faf 608 @r = $X->splice(offset, length, elements);
f6b705ef 609
cad2e5aa
JH
610 # DBM Filters
611 $old_filter = $db->filter_store_key ( sub { ... } ) ;
612 $old_filter = $db->filter_store_value( sub { ... } ) ;
613 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
614 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
615
3b35bae3
AD
616 untie %hash ;
617 untie @array ;
618
619=head1 DESCRIPTION
620
8e07c86e 621B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 622facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 623version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f
PM
624It is assumed that you have a copy of the Berkeley DB manual pages at
625hand when reading this documentation. The interface defined here
626mirrors the Berkeley DB interface closely.
68dc0745 627
8e07c86e
AD
628Berkeley DB is a C library which provides a consistent interface to a
629number of database formats. B<DB_File> provides an interface to all
630three of the database types currently supported by Berkeley DB.
3b35bae3
AD
631
632The file types are:
633
634=over 5
635
88108326 636=item B<DB_HASH>
3b35bae3 637
88108326 638This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
639files. This is equivalent to the functionality provided by other
640hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
641the files created using DB_HASH are not compatible with any of the
642other packages mentioned.
3b35bae3 643
8e07c86e
AD
644A default hashing algorithm, which will be adequate for most
645applications, is built into Berkeley DB. If you do need to use your own
646hashing algorithm it is possible to write your own in Perl and have
647B<DB_File> use it instead.
3b35bae3 648
88108326 649=item B<DB_BTREE>
650
651The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 652sorted, balanced binary tree.
3b35bae3 653
8e07c86e
AD
654As with the DB_HASH format, it is possible to provide a user defined
655Perl routine to perform the comparison of keys. By default, though, the
656keys are stored in lexical order.
3b35bae3 657
88108326 658=item B<DB_RECNO>
3b35bae3 659
8e07c86e
AD
660DB_RECNO allows both fixed-length and variable-length flat text files
661to be manipulated using the same key/value pair interface as in DB_HASH
662and DB_BTREE. In this case the key will consist of a record (line)
663number.
3b35bae3
AD
664
665=back
666
e5021521 667=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea
PM
668
669Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 670it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 671limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 672version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 673like version 1. This feature allows B<DB_File> scripts that were built
e5021521 674with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea
PM
675
676If you want to make use of the new features available in Berkeley DB
b90e71be 6772.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 678
e5021521
JH
679B<Note:> The database file format has changed multiple times in Berkeley
680DB version 2, 3 and 4. If you cannot recreate your databases, you
681must dump any existing databases with either the C<db_dump> or the
682C<db_dump185> utility that comes with Berkeley DB.
683Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
684your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea
PM
685documentation for further details.
686
e5021521 687Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 688DB with DB_File.
1f70e1ea 689
68dc0745 690=head2 Interface to Berkeley DB
3b35bae3
AD
691
692B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
693in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
694allows B<DB_File> to access Berkeley DB files using either an
695associative array (for DB_HASH & DB_BTREE file types) or an ordinary
696array (for the DB_RECNO file type).
3b35bae3 697
88108326 698In addition to the tie() interface, it is also possible to access most
699of the functions provided in the Berkeley DB API directly.
f6b705ef 700See L<THE API INTERFACE>.
3b35bae3 701
88108326 702=head2 Opening a Berkeley DB Database File
3b35bae3 703
8e07c86e 704Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 705Here is the C prototype for dbopen():
3b35bae3
AD
706
707 DB*
708 dbopen (const char * file, int flags, int mode,
709 DBTYPE type, const void * openinfo)
710
711The parameter C<type> is an enumeration which specifies which of the 3
712interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
713Depending on which of these is actually chosen, the final parameter,
714I<openinfo> points to a data structure which allows tailoring of the
715specific interface method.
716
8e07c86e 717This interface is handled slightly differently in B<DB_File>. Here is
88108326 718an equivalent call using B<DB_File>:
3b35bae3 719
88108326 720 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 721
8e07c86e
AD
722The C<filename>, C<flags> and C<mode> parameters are the direct
723equivalent of their dbopen() counterparts. The final parameter $DB_HASH
724performs the function of both the C<type> and C<openinfo> parameters in
725dbopen().
3b35bae3 726
88108326 727In the example above $DB_HASH is actually a pre-defined reference to a
728hash object. B<DB_File> has three of these pre-defined references.
729Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 730
8e07c86e
AD
731The keys allowed in each of these pre-defined references is limited to
732the names used in the equivalent C structure. So, for example, the
733$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 734C<ffactor>, C<hash>, C<lorder> and C<nelem>.
735
736To change one of these elements, just assign to it like this:
737
738 $DB_HASH->{'cachesize'} = 10000 ;
739
740The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
741usually adequate for most applications. If you do need to create extra
742instances of these objects, constructors are available for each file
743type.
744
745Here are examples of the constructors and the valid options available
746for DB_HASH, DB_BTREE and DB_RECNO respectively.
747
748 $a = new DB_File::HASHINFO ;
749 $a->{'bsize'} ;
750 $a->{'cachesize'} ;
751 $a->{'ffactor'};
752 $a->{'hash'} ;
753 $a->{'lorder'} ;
754 $a->{'nelem'} ;
755
756 $b = new DB_File::BTREEINFO ;
757 $b->{'flags'} ;
758 $b->{'cachesize'} ;
759 $b->{'maxkeypage'} ;
760 $b->{'minkeypage'} ;
761 $b->{'psize'} ;
762 $b->{'compare'} ;
763 $b->{'prefix'} ;
764 $b->{'lorder'} ;
765
766 $c = new DB_File::RECNOINFO ;
767 $c->{'bval'} ;
768 $c->{'cachesize'} ;
769 $c->{'psize'} ;
770 $c->{'flags'} ;
771 $c->{'lorder'} ;
772 $c->{'reclen'} ;
773 $c->{'bfname'} ;
774
775The values stored in the hashes above are mostly the direct equivalent
776of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 777default values - that means you don't have to set I<all> of the
88108326 778values when you only want to change one. Here is an example:
779
780 $a = new DB_File::HASHINFO ;
781 $a->{'cachesize'} = 12345 ;
782 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
783
36477c24 784A few of the options need extra discussion here. When used, the C
88108326 785equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
786to C functions. In B<DB_File> these keys are used to store references
787to Perl subs. Below are templates for each of the subs:
788
789 sub hash
790 {
791 my ($data) = @_ ;
792 ...
793 # return the hash value for $data
794 return $hash ;
795 }
3b35bae3 796
88108326 797 sub compare
798 {
799 my ($key, $key2) = @_ ;
800 ...
801 # return 0 if $key1 eq $key2
802 # -1 if $key1 lt $key2
803 # 1 if $key1 gt $key2
804 return (-1 , 0 or 1) ;
805 }
3b35bae3 806
88108326 807 sub prefix
808 {
809 my ($key, $key2) = @_ ;
810 ...
811 # return number of bytes of $key2 which are
812 # necessary to determine that it is greater than $key1
813 return $bytes ;
814 }
3b35bae3 815
f6b705ef 816See L<Changing the BTREE sort order> for an example of using the
817C<compare> template.
88108326 818
36477c24 819If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 820C<bval>, you should check out L<The 'bval' Option>.
36477c24 821
88108326 822=head2 Default Parameters
823
824It is possible to omit some or all of the final 4 parameters in the
825call to C<tie> and let them take default values. As DB_HASH is the most
826common file format used, the call:
827
828 tie %A, "DB_File", "filename" ;
829
830is equivalent to:
831
18d2dc8c 832 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 833
834It is also possible to omit the filename parameter as well, so the
835call:
836
837 tie %A, "DB_File" ;
838
839is equivalent to:
840
18d2dc8c 841 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 842
f6b705ef 843See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 844in place of a filename.
845
f6b705ef 846=head2 In Memory Databases
847
848Berkeley DB allows the creation of in-memory databases by using NULL
849(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
850uses C<undef> instead of NULL to provide this functionality.
851
852=head1 DB_HASH
853
854The DB_HASH file format is probably the most commonly used of the three
855file formats that B<DB_File> supports. It is also very straightforward
856to use.
857
68dc0745 858=head2 A Simple Example
f6b705ef 859
860This example shows how to create a database, add key/value pairs to the
861database, delete keys/value pairs and finally how to enumerate the
862contents of the database.
863
3245f058 864 use warnings ;
610ab055 865 use strict ;
f6b705ef 866 use DB_File ;
07200f1b 867 our (%h, $k, $v) ;
f6b705ef 868
2c2d71f5 869 unlink "fruit" ;
45a340cb 870 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 871 or die "Cannot open file 'fruit': $!\n";
872
873 # Add a few key/value pairs to the file
874 $h{"apple"} = "red" ;
875 $h{"orange"} = "orange" ;
876 $h{"banana"} = "yellow" ;
877 $h{"tomato"} = "red" ;
878
879 # Check for existence of a key
880 print "Banana Exists\n\n" if $h{"banana"} ;
881
882 # Delete a key/value pair.
883 delete $h{"apple"} ;
884
885 # print the contents of the file
886 while (($k, $v) = each %h)
887 { print "$k -> $v\n" }
888
889 untie %h ;
890
891here is the output:
892
893 Banana Exists
bbc7dcd2 894
f6b705ef 895 orange -> orange
896 tomato -> red
897 banana -> yellow
898
899Note that the like ordinary associative arrays, the order of the keys
900retrieved is in an apparently random order.
901
902=head1 DB_BTREE
903
904The DB_BTREE format is useful when you want to store data in a given
905order. By default the keys will be stored in lexical order, but as you
906will see from the example shown in the next section, it is very easy to
907define your own sorting function.
908
909=head2 Changing the BTREE sort order
910
911This script shows how to override the default sorting algorithm that
912BTREE uses. Instead of using the normal lexical ordering, a case
913insensitive compare function will be used.
88108326 914
3245f058 915 use warnings ;
610ab055 916 use strict ;
f6b705ef 917 use DB_File ;
610ab055
PM
918
919 my %h ;
f6b705ef 920
921 sub Compare
922 {
923 my ($key1, $key2) = @_ ;
924 "\L$key1" cmp "\L$key2" ;
925 }
926
927 # specify the Perl sub that will do the comparison
928 $DB_BTREE->{'compare'} = \&Compare ;
929
2c2d71f5 930 unlink "tree" ;
45a340cb 931 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 932 or die "Cannot open file 'tree': $!\n" ;
933
934 # Add a key/value pair to the file
935 $h{'Wall'} = 'Larry' ;
936 $h{'Smith'} = 'John' ;
937 $h{'mouse'} = 'mickey' ;
938 $h{'duck'} = 'donald' ;
939
940 # Delete
941 delete $h{"duck"} ;
942
943 # Cycle through the keys printing them in order.
944 # Note it is not necessary to sort the keys as
945 # the btree will have kept them in order automatically.
946 foreach (keys %h)
947 { print "$_\n" }
948
949 untie %h ;
950
951Here is the output from the code above.
952
953 mouse
954 Smith
955 Wall
956
957There are a few point to bear in mind if you want to change the
958ordering in a BTREE database:
959
960=over 5
961
962=item 1.
963
964The new compare function must be specified when you create the database.
965
966=item 2.
967
968You cannot change the ordering once the database has been created. Thus
969you must use the same compare function every time you access the
88108326 970database.
971
39793c41
PM
972=item 3
973
974Duplicate keys are entirely defined by the comparison function.
975In the case-insensitive example above, the keys: 'KEY' and 'key'
976would be considered duplicates, and assigning to the second one
52ffee89 977would overwrite the first. If duplicates are allowed for (with the
59e51af5 978R_DUP flag discussed below), only a single copy of duplicate keys
39793c41
PM
979is stored in the database --- so (again with example above) assigning
980three values to the keys: 'KEY', 'Key', and 'key' would leave just
981the first key: 'KEY' in the database with three values. For some
982situations this results in information loss, so care should be taken
983to provide fully qualified comparison functions when necessary.
984For example, the above comparison routine could be modified to
985additionally compare case-sensitively if two keys are equal in the
986case insensitive comparison:
987
988 sub compare {
989 my($key1, $key2) = @_;
990 lc $key1 cmp lc $key2 ||
991 $key1 cmp $key2;
992 }
993
994And now you will only have duplicates when the keys themselves
995are truly the same. (note: in versions of the db library prior to
996about November 1996, such duplicate keys were retained so it was
997possible to recover the original keys in sets of keys that
998compared as equal).
999
1000
f6b705ef 1001=back
1002
68dc0745 1003=head2 Handling Duplicate Keys
f6b705ef 1004
1005The BTREE file type optionally allows a single key to be associated
1006with an arbitrary number of values. This option is enabled by setting
1007the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1008
88108326 1009There are some difficulties in using the tied hash interface if you
1010want to manipulate a BTREE database with duplicate keys. Consider this
1011code:
1012
3245f058 1013 use warnings ;
610ab055 1014 use strict ;
88108326 1015 use DB_File ;
610ab055 1016
962cee9f 1017 my ($filename, %h) ;
610ab055 1018
88108326 1019 $filename = "tree" ;
1020 unlink $filename ;
bbc7dcd2 1021
88108326 1022 # Enable duplicate records
1023 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1024
45a340cb 1025 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1026 or die "Cannot open $filename: $!\n";
bbc7dcd2 1027
88108326 1028 # Add some key/value pairs to the file
1029 $h{'Wall'} = 'Larry' ;
1030 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1031 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1032 $h{'Smith'} = 'John' ;
1033 $h{'mouse'} = 'mickey' ;
1034
1035 # iterate through the associative array
1036 # and print each key/value pair.
2c2d71f5 1037 foreach (sort keys %h)
88108326 1038 { print "$_ -> $h{$_}\n" }
1039
f6b705ef 1040 untie %h ;
1041
88108326 1042Here is the output:
1043
1044 Smith -> John
1045 Wall -> Larry
1046 Wall -> Larry
f6b705ef 1047 Wall -> Larry
88108326 1048 mouse -> mickey
1049
f6b705ef 1050As you can see 3 records have been successfully created with key C<Wall>
88108326 1051- the only thing is, when they are retrieved from the database they
f6b705ef 1052I<seem> to have the same value, namely C<Larry>. The problem is caused
1053by the way that the associative array interface works. Basically, when
1054the associative array interface is used to fetch the value associated
1055with a given key, it will only ever retrieve the first value.
88108326 1056
1057Although it may not be immediately obvious from the code above, the
1058associative array interface can be used to write values with duplicate
1059keys, but it cannot be used to read them back from the database.
1060
1061The way to get around this problem is to use the Berkeley DB API method
1062called C<seq>. This method allows sequential access to key/value
f6b705ef 1063pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1064and the API in general.
88108326 1065
1066Here is the script above rewritten using the C<seq> API method.
1067
3245f058 1068 use warnings ;
610ab055 1069 use strict ;
88108326 1070 use DB_File ;
bbc7dcd2 1071
962cee9f 1072 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1073
88108326 1074 $filename = "tree" ;
1075 unlink $filename ;
bbc7dcd2 1076
88108326 1077 # Enable duplicate records
1078 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1079
45a340cb 1080 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1081 or die "Cannot open $filename: $!\n";
bbc7dcd2 1082
88108326 1083 # Add some key/value pairs to the file
1084 $h{'Wall'} = 'Larry' ;
1085 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1086 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1087 $h{'Smith'} = 'John' ;
1088 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1089
f6b705ef 1090 # iterate through the btree using seq
88108326 1091 # and print each key/value pair.
610ab055 1092 $key = $value = 0 ;
f6b705ef 1093 for ($status = $x->seq($key, $value, R_FIRST) ;
1094 $status == 0 ;
1095 $status = $x->seq($key, $value, R_NEXT) )
88108326 1096 { print "$key -> $value\n" }
bbc7dcd2 1097
88108326 1098 undef $x ;
1099 untie %h ;
1100
1101that prints:
1102
1103 Smith -> John
1104 Wall -> Brick
f6b705ef 1105 Wall -> Brick
88108326 1106 Wall -> Larry
1107 mouse -> mickey
1108
f6b705ef 1109This time we have got all the key/value pairs, including the multiple
88108326 1110values associated with the key C<Wall>.
1111
6ca2e664
PM
1112To make life easier when dealing with duplicate keys, B<DB_File> comes with
1113a few utility methods.
1114
68dc0745 1115=head2 The get_dup() Method
f6b705ef 1116
6ca2e664 1117The C<get_dup> method assists in
88108326 1118reading duplicate values from BTREE databases. The method can take the
1119following forms:
1120
1121 $count = $x->get_dup($key) ;
1122 @list = $x->get_dup($key) ;
1123 %list = $x->get_dup($key, 1) ;
1124
1125In a scalar context the method returns the number of values associated
1126with the key, C<$key>.
1127
1128In list context, it returns all the values which match C<$key>. Note
f6b705ef 1129that the values will be returned in an apparently random order.
88108326 1130
7a2e2cd6 1131In list context, if the second parameter is present and evaluates
1132TRUE, the method returns an associative array. The keys of the
1133associative array correspond to the values that matched in the BTREE
1134and the values of the array are a count of the number of times that
1135particular value occurred in the BTREE.
88108326 1136
f6b705ef 1137So assuming the database created above, we can use C<get_dup> like
88108326 1138this:
1139
3245f058 1140 use warnings ;
2c2d71f5
JH
1141 use strict ;
1142 use DB_File ;
bbc7dcd2 1143
962cee9f 1144 my ($filename, $x, %h) ;
2c2d71f5
JH
1145
1146 $filename = "tree" ;
bbc7dcd2 1147
2c2d71f5
JH
1148 # Enable duplicate records
1149 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1150
45a340cb 1151 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5
JH
1152 or die "Cannot open $filename: $!\n";
1153
610ab055 1154 my $cnt = $x->get_dup("Wall") ;
88108326 1155 print "Wall occurred $cnt times\n" ;
1156
610ab055 1157 my %hash = $x->get_dup("Wall", 1) ;
88108326 1158 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1159 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1160
2c2d71f5 1161 my @list = sort $x->get_dup("Wall") ;
88108326 1162 print "Wall => [@list]\n" ;
1163
f6b705ef 1164 @list = $x->get_dup("Smith") ;
88108326 1165 print "Smith => [@list]\n" ;
bbc7dcd2 1166
f6b705ef 1167 @list = $x->get_dup("Dog") ;
88108326 1168 print "Dog => [@list]\n" ;
1169
1170
1171and it will print:
1172
f6b705ef 1173 Wall occurred 3 times
88108326 1174 Larry is there
f6b705ef 1175 There are 2 Brick Walls
1176 Wall => [Brick Brick Larry]
88108326 1177 Smith => [John]
1178 Dog => []
3b35bae3 1179
6ca2e664
PM
1180=head2 The find_dup() Method
1181
1182 $status = $X->find_dup($key, $value) ;
1183
b90e71be 1184This method checks for the existence of a specific key/value pair. If the
6ca2e664
PM
1185pair exists, the cursor is left pointing to the pair and the method
1186returns 0. Otherwise the method returns a non-zero value.
1187
1188Assuming the database from the previous example:
1189
3245f058 1190 use warnings ;
6ca2e664
PM
1191 use strict ;
1192 use DB_File ;
bbc7dcd2 1193
962cee9f 1194 my ($filename, $x, %h, $found) ;
6ca2e664 1195
07200f1b 1196 $filename = "tree" ;
bbc7dcd2 1197
6ca2e664
PM
1198 # Enable duplicate records
1199 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1200
45a340cb 1201 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1202 or die "Cannot open $filename: $!\n";
1203
1204 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1205 print "Larry Wall is $found there\n" ;
bbc7dcd2 1206
6ca2e664
PM
1207 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1208 print "Harry Wall is $found there\n" ;
bbc7dcd2 1209
6ca2e664
PM
1210 undef $x ;
1211 untie %h ;
1212
1213prints this
1214
2c2d71f5 1215 Larry Wall is there
6ca2e664
PM
1216 Harry Wall is not there
1217
1218
1219=head2 The del_dup() Method
1220
1221 $status = $X->del_dup($key, $value) ;
1222
1223This method deletes a specific key/value pair. It returns
12240 if they exist and have been deleted successfully.
1225Otherwise the method returns a non-zero value.
1226
b90e71be 1227Again assuming the existence of the C<tree> database
6ca2e664 1228
3245f058 1229 use warnings ;
6ca2e664
PM
1230 use strict ;
1231 use DB_File ;
bbc7dcd2 1232
962cee9f 1233 my ($filename, $x, %h, $found) ;
6ca2e664 1234
07200f1b 1235 $filename = "tree" ;
bbc7dcd2 1236
6ca2e664
PM
1237 # Enable duplicate records
1238 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1239
45a340cb 1240 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1241 or die "Cannot open $filename: $!\n";
1242
1243 $x->del_dup("Wall", "Larry") ;
1244
1245 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1246 print "Larry Wall is $found there\n" ;
bbc7dcd2 1247
6ca2e664
PM
1248 undef $x ;
1249 untie %h ;
1250
1251prints this
1252
1253 Larry Wall is not there
1254
f6b705ef 1255=head2 Matching Partial Keys
1256
1257The BTREE interface has a feature which allows partial keys to be
1258matched. This functionality is I<only> available when the C<seq> method
1259is used along with the R_CURSOR flag.
1260
1261 $x->seq($key, $value, R_CURSOR) ;
1262
1263Here is the relevant quote from the dbopen man page where it defines
1264the use of the R_CURSOR flag with seq:
1265
f6b705ef 1266 Note, for the DB_BTREE access method, the returned key is not
1267 necessarily an exact match for the specified key. The returned key
1268 is the smallest key greater than or equal to the specified key,
1269 permitting partial key matches and range searches.
1270
f6b705ef 1271In the example script below, the C<match> sub uses this feature to find
1272and print the first matching key/value pair given a partial key.
1273
3245f058 1274 use warnings ;
610ab055 1275 use strict ;
f6b705ef 1276 use DB_File ;
1277 use Fcntl ;
610ab055 1278
962cee9f 1279 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1280
1281 sub match
1282 {
1283 my $key = shift ;
610ab055 1284 my $value = 0;
f6b705ef 1285 my $orig_key = $key ;
1286 $x->seq($key, $value, R_CURSOR) ;
1287 print "$orig_key\t-> $key\t-> $value\n" ;
1288 }
1289
1290 $filename = "tree" ;
1291 unlink $filename ;
1292
45a340cb 1293 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1294 or die "Cannot open $filename: $!\n";
bbc7dcd2 1295
f6b705ef 1296 # Add some key/value pairs to the file
1297 $h{'mouse'} = 'mickey' ;
1298 $h{'Wall'} = 'Larry' ;
1299 $h{'Walls'} = 'Brick' ;
1300 $h{'Smith'} = 'John' ;
bbc7dcd2 1301
f6b705ef 1302
610ab055 1303 $key = $value = 0 ;
f6b705ef 1304 print "IN ORDER\n" ;
1305 for ($st = $x->seq($key, $value, R_FIRST) ;
1306 $st == 0 ;
1307 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1308
2c2d71f5 1309 { print "$key -> $value\n" }
bbc7dcd2 1310
f6b705ef 1311 print "\nPARTIAL MATCH\n" ;
1312
1313 match "Wa" ;
1314 match "A" ;
1315 match "a" ;
1316
1317 undef $x ;
1318 untie %h ;
1319
1320Here is the output:
1321
1322 IN ORDER
1323 Smith -> John
1324 Wall -> Larry
1325 Walls -> Brick
1326 mouse -> mickey
1327
1328 PARTIAL MATCH
1329 Wa -> Wall -> Larry
1330 A -> Smith -> John
1331 a -> mouse -> mickey
1332
1333=head1 DB_RECNO
1334
1335DB_RECNO provides an interface to flat text files. Both variable and
1336fixed length records are supported.
3b35bae3 1337
6ca2e664 1338In order to make RECNO more compatible with Perl, the array offset for
88108326 1339all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1340
88108326 1341As with normal Perl arrays, a RECNO array can be accessed using
1342negative indexes. The index -1 refers to the last element of the array,
1343-2 the second last, and so on. Attempting to access an element before
1344the start of the array will raise a fatal run-time error.
3b35bae3 1345
68dc0745 1346=head2 The 'bval' Option
36477c24 1347
1348The operation of the bval option warrants some discussion. Here is the
1349definition of bval from the Berkeley DB 1.85 recno manual page:
1350
1351 The delimiting byte to be used to mark the end of a
1352 record for variable-length records, and the pad charac-
1353 ter for fixed-length records. If no value is speci-
1354 fied, newlines (``\n'') are used to mark the end of
1355 variable-length records and fixed-length records are
1356 padded with spaces.
1357
1358The second sentence is wrong. In actual fact bval will only default to
1359C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1360openinfo parameter is used at all, the value that happens to be in bval
1361will be used. That means you always have to specify bval when making
1362use of any of the options in the openinfo parameter. This documentation
1363error will be fixed in the next release of Berkeley DB.
1364
1365That clarifies the situation with regards Berkeley DB itself. What
1366about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1367quite useful, so B<DB_File> conforms to it.
36477c24 1368
1369That means that you can specify other options (e.g. cachesize) and
1370still have bval default to C<"\n"> for variable length records, and
1371space for fixed length records.
1372
c5da4faf 1373Also note that the bval option only allows you to specify a single byte
a6d6498e 1374as a delimiter.
c5da4faf 1375
f6b705ef 1376=head2 A Simple Example
3b35bae3 1377
6ca2e664
PM
1378Here is a simple example that uses RECNO (if you are using a version
1379of Perl earlier than 5.004_57 this example won't work -- see
1380L<Extra RECNO Methods> for a workaround).
f6b705ef 1381
3245f058 1382 use warnings ;
610ab055 1383 use strict ;
f6b705ef 1384 use DB_File ;
f6b705ef 1385
2c2d71f5
JH
1386 my $filename = "text" ;
1387 unlink $filename ;
1388
610ab055 1389 my @h ;
45a340cb 1390 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1391 or die "Cannot open file 'text': $!\n" ;
1392
1393 # Add a few key/value pairs to the file
1394 $h[0] = "orange" ;
1395 $h[1] = "blue" ;
1396 $h[2] = "yellow" ;
1397
6ca2e664
PM
1398 push @h, "green", "black" ;
1399
1400 my $elements = scalar @h ;
1401 print "The array contains $elements entries\n" ;
1402
1403 my $last = pop @h ;
1404 print "popped $last\n" ;
1405
1406 unshift @h, "white" ;
1407 my $first = shift @h ;
1408 print "shifted $first\n" ;
1409
f6b705ef 1410 # Check for existence of a key
1411 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1412
1413 # use a negative index
1414 print "The last element is $h[-1]\n" ;
1415 print "The 2nd last element is $h[-2]\n" ;
1416
1417 untie @h ;
3b35bae3 1418
f6b705ef 1419Here is the output from the script:
1420
6ca2e664
PM
1421 The array contains 5 entries
1422 popped black
2c2d71f5 1423 shifted white
f6b705ef 1424 Element 1 Exists with value blue
6ca2e664
PM
1425 The last element is green
1426 The 2nd last element is yellow
f6b705ef 1427
6ca2e664 1428=head2 Extra RECNO Methods
f6b705ef 1429
045291aa 1430If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1431array interface is quite limited. In the example script above
1432C<push>, C<pop>, C<shift>, C<unshift>
1433or determining the array length will not work with a tied array.
045291aa
PM
1434
1435To make the interface more useful for older versions of Perl, a number
1436of methods are supplied with B<DB_File> to simulate the missing array
1437operations. All these methods are accessed via the object returned from
1438the tie call.
f6b705ef 1439
1440Here are the methods:
1441
1442=over 5
3b35bae3 1443
f6b705ef 1444=item B<$X-E<gt>push(list) ;>
1445
1446Pushes the elements of C<list> to the end of the array.
1447
1448=item B<$value = $X-E<gt>pop ;>
1449
1450Removes and returns the last element of the array.
1451
1452=item B<$X-E<gt>shift>
1453
1454Removes and returns the first element of the array.
1455
1456=item B<$X-E<gt>unshift(list) ;>
1457
1458Pushes the elements of C<list> to the start of the array.
1459
1460=item B<$X-E<gt>length>
1461
1462Returns the number of elements in the array.
1463
c5da4faf
PM
1464=item B<$X-E<gt>splice(offset, length, elements);>
1465
a6d05634 1466Returns a splice of the array.
c5da4faf 1467
f6b705ef 1468=back
1469
1470=head2 Another Example
1471
1472Here is a more complete example that makes use of some of the methods
1473described above. It also makes use of the API interface directly (see
1474L<THE API INTERFACE>).
1475
3245f058 1476 use warnings ;
f6b705ef 1477 use strict ;
962cee9f 1478 my (@h, $H, $file, $i) ;
f6b705ef 1479 use DB_File ;
1480 use Fcntl ;
bbc7dcd2 1481
f6b705ef 1482 $file = "text" ;
1483
1484 unlink $file ;
1485
45a340cb 1486 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1487 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1488
f6b705ef 1489 # first create a text file to play with
1490 $h[0] = "zero" ;
1491 $h[1] = "one" ;
1492 $h[2] = "two" ;
1493 $h[3] = "three" ;
1494 $h[4] = "four" ;
1495
bbc7dcd2 1496
f6b705ef 1497 # Print the records in order.
1498 #
1499 # The length method is needed here because evaluating a tied
1500 # array in a scalar context does not return the number of
1501 # elements in the array.
1502
1503 print "\nORIGINAL\n" ;
1504 foreach $i (0 .. $H->length - 1) {
1505 print "$i: $h[$i]\n" ;
1506 }
1507
1508 # use the push & pop methods
1509 $a = $H->pop ;
1510 $H->push("last") ;
1511 print "\nThe last record was [$a]\n" ;
1512
1513 # and the shift & unshift methods
1514 $a = $H->shift ;
1515 $H->unshift("first") ;
1516 print "The first record was [$a]\n" ;
1517
1518 # Use the API to add a new record after record 2.
1519 $i = 2 ;
1520 $H->put($i, "Newbie", R_IAFTER) ;
1521
1522 # and a new record before record 1.
1523 $i = 1 ;
1524 $H->put($i, "New One", R_IBEFORE) ;
1525
1526 # delete record 3
1527 $H->del(3) ;
1528
1529 # now print the records in reverse order
1530 print "\nREVERSE\n" ;
1531 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1532 { print "$i: $h[$i]\n" }
1533
1534 # same again, but use the API functions instead
1535 print "\nREVERSE again\n" ;
610ab055 1536 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1537 for ($s = $H->seq($k, $v, R_LAST) ;
1538 $s == 0 ;
1539 $s = $H->seq($k, $v, R_PREV))
1540 { print "$k: $v\n" }
1541
1542 undef $H ;
1543 untie @h ;
1544
1545and this is what it outputs:
1546
1547 ORIGINAL
1548 0: zero
1549 1: one
1550 2: two
1551 3: three
1552 4: four
1553
1554 The last record was [four]
1555 The first record was [zero]
1556
1557 REVERSE
1558 5: last
1559 4: three
1560 3: Newbie
1561 2: one
1562 1: New One
1563 0: first
1564
1565 REVERSE again
1566 5: last
1567 4: three
1568 3: Newbie
1569 2: one
1570 1: New One
1571 0: first
1572
1573Notes:
1574
1575=over 5
1576
1577=item 1.
1578
1579Rather than iterating through the array, C<@h> like this:
1580
1581 foreach $i (@h)
1582
1583it is necessary to use either this:
1584
1585 foreach $i (0 .. $H->length - 1)
1586
1587or this:
1588
1589 for ($a = $H->get($k, $v, R_FIRST) ;
1590 $a == 0 ;
1591 $a = $H->get($k, $v, R_NEXT) )
1592
1593=item 2.
1594
1595Notice that both times the C<put> method was used the record index was
1596specified using a variable, C<$i>, rather than the literal value
1597itself. This is because C<put> will return the record number of the
1598inserted line via that parameter.
1599
1600=back
1601
1602=head1 THE API INTERFACE
3b35bae3
AD
1603
1604As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1605possible to make direct use of most of the API functions defined in the
8e07c86e 1606Berkeley DB documentation.
3b35bae3 1607
88108326 1608To do this you need to store a copy of the object returned from the tie.
3b35bae3 1609
88108326 1610 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1611
8e07c86e 1612Once you have done that, you can access the Berkeley DB API functions
88108326 1613as B<DB_File> methods directly like this:
3b35bae3
AD
1614
1615 $db->put($key, $value, R_NOOVERWRITE) ;
1616
88108326 1617B<Important:> If you have saved a copy of the object returned from
1618C<tie>, the underlying database file will I<not> be closed until both
1619the tied variable is untied and all copies of the saved object are
610ab055 1620destroyed.
88108326 1621
1622 use DB_File ;
1623 $db = tie %hash, "DB_File", "filename"
1624 or die "Cannot tie filename: $!" ;
1625 ...
1626 undef $db ;
1627 untie %hash ;
1628
9a2c4ce3 1629See L<The untie() Gotcha> for more details.
778183f3 1630
88108326 1631All the functions defined in L<dbopen> are available except for
1632close() and dbopen() itself. The B<DB_File> method interface to the
1633supported functions have been implemented to mirror the way Berkeley DB
1634works whenever possible. In particular note that:
1635
1636=over 5
1637
1638=item *
1639
1640The methods return a status value. All return 0 on success.
1641All return -1 to signify an error and set C<$!> to the exact
1642error code. The return code 1 generally (but not always) means that the
1643key specified did not exist in the database.
1644
1645Other return codes are defined. See below and in the Berkeley DB
1646documentation for details. The Berkeley DB documentation should be used
1647as the definitive source.
1648
1649=item *
3b35bae3 1650
88108326 1651Whenever a Berkeley DB function returns data via one of its parameters,
1652the equivalent B<DB_File> method does exactly the same.
3b35bae3 1653
88108326 1654=item *
1655
1656If you are careful, it is possible to mix API calls with the tied
1657hash/array interface in the same piece of code. Although only a few of
1658the methods used to implement the tied interface currently make use of
1659the cursor, you should always assume that the cursor has been changed
1660any time the tied hash/array interface is used. As an example, this
1661code will probably not do what you expect:
1662
1663 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1664 or die "Cannot tie $filename: $!" ;
1665
1666 # Get the first key/value pair and set the cursor
1667 $X->seq($key, $value, R_FIRST) ;
1668
1669 # this line will modify the cursor
1670 $count = scalar keys %x ;
1671
1672 # Get the second key/value pair.
1673 # oops, it didn't, it got the last key/value pair!
1674 $X->seq($key, $value, R_NEXT) ;
1675
1676The code above can be rearranged to get around the problem, like this:
1677
1678 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1679 or die "Cannot tie $filename: $!" ;
1680
1681 # this line will modify the cursor
1682 $count = scalar keys %x ;
1683
1684 # Get the first key/value pair and set the cursor
1685 $X->seq($key, $value, R_FIRST) ;
1686
1687 # Get the second key/value pair.
1688 # worked this time.
1689 $X->seq($key, $value, R_NEXT) ;
1690
1691=back
1692
1693All the constants defined in L<dbopen> for use in the flags parameters
1694in the methods defined below are also available. Refer to the Berkeley
1695DB documentation for the precise meaning of the flags values.
1696
1697Below is a list of the methods available.
3b35bae3
AD
1698
1699=over 5
1700
f6b705ef 1701=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1702
1703Given a key (C<$key>) this method reads the value associated with it
1704from the database. The value read from the database is returned in the
1705C<$value> parameter.
3b35bae3 1706
88108326 1707If the key does not exist the method returns 1.
3b35bae3 1708
88108326 1709No flags are currently defined for this method.
3b35bae3 1710
f6b705ef 1711=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1712
88108326 1713Stores the key/value pair in the database.
1714
1715If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1716will have the record number of the inserted key/value pair set.
3b35bae3 1717
88108326 1718Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1719R_SETCURSOR.
1720
f6b705ef 1721=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1722
88108326 1723Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1724
88108326 1725A return code of 1 means that the requested key was not in the
1726database.
3b35bae3 1727
88108326 1728R_CURSOR is the only valid flag at present.
3b35bae3 1729
f6b705ef 1730=item B<$status = $X-E<gt>fd ;>
3b35bae3 1731
88108326 1732Returns the file descriptor for the underlying database.
3b35bae3 1733
b90e71be
GS
1734See L<Locking: The Trouble with fd> for an explanation for why you should
1735not use C<fd> to lock your database.
3b35bae3 1736
f6b705ef 1737=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1738
88108326 1739This interface allows sequential retrieval from the database. See
1740L<dbopen> for full details.
1741
1742Both the C<$key> and C<$value> parameters will be set to the key/value
1743pair read from the database.
1744
1745The flags parameter is mandatory. The valid flag values are R_CURSOR,
1746R_FIRST, R_LAST, R_NEXT and R_PREV.
1747
f6b705ef 1748=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1749
1750Flushes any cached buffers to disk.
1751
1752R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1753
1754=back
1755
cad2e5aa
JH
1756=head1 DBM FILTERS
1757
1758A DBM Filter is a piece of code that is be used when you I<always>
1759want to make the same transformation to all keys and/or values in a
1760DBM database.
1761
1762There are four methods associated with DBM Filters. All work identically,
1763and each is used to install (or uninstall) a single DBM Filter. Each
1764expects a single parameter, namely a reference to a sub. The only
1765difference between them is the place that the filter is installed.
1766
1767To summarise:
1768
1769=over 5
1770
1771=item B<filter_store_key>
1772
1773If a filter has been installed with this method, it will be invoked
1774every time you write a key to a DBM database.
1775
1776=item B<filter_store_value>
1777
1778If a filter has been installed with this method, it will be invoked
1779every time you write a value to a DBM database.
1780
1781
1782=item B<filter_fetch_key>
1783
1784If a filter has been installed with this method, it will be invoked
1785every time you read a key from a DBM database.
1786
1787=item B<filter_fetch_value>
1788
1789If a filter has been installed with this method, it will be invoked
1790every time you read a value from a DBM database.
1791
1792=back
1793
1794You can use any combination of the methods, from none, to all four.
1795
1796All filter methods return the existing filter, if present, or C<undef>
1797in not.
1798
1799To delete a filter pass C<undef> to it.
1800
1801=head2 The Filter
1802
1803When each filter is called by Perl, a local copy of C<$_> will contain
1804the key or value to be filtered. Filtering is achieved by modifying
1805the contents of C<$_>. The return code from the filter is ignored.
1806
1807=head2 An Example -- the NULL termination problem.
1808
1809Consider the following scenario. You have a DBM database
1810that you need to share with a third-party C application. The C application
1811assumes that I<all> keys and values are NULL terminated. Unfortunately
1812when Perl writes to DBM databases it doesn't use NULL termination, so
1813your Perl application will have to manage NULL termination itself. When
1814you write to the database you will have to use something like this:
1815
1816 $hash{"$key\0"} = "$value\0" ;
1817
1818Similarly the NULL needs to be taken into account when you are considering
1819the length of existing keys/values.
1820
1821It would be much better if you could ignore the NULL terminations issue
1822in the main application code and have a mechanism that automatically
1823added the terminating NULL to all keys and values whenever you write to
1824the database and have them removed when you read from the database. As I'm
1825sure you have already guessed, this is a problem that DBM Filters can
1826fix very easily.
1827
3245f058 1828 use warnings ;
cad2e5aa
JH
1829 use strict ;
1830 use DB_File ;
1831
1832 my %hash ;
2359510d 1833 my $filename = "filt" ;
cad2e5aa
JH
1834 unlink $filename ;
1835
1836 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1837 or die "Cannot open $filename: $!\n" ;
1838
1839 # Install DBM Filters
1840 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1841 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1842 $db->filter_fetch_value( sub { s/\0$// } ) ;
1843 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1844
1845 $hash{"abc"} = "def" ;
1846 my $a = $hash{"ABC"} ;
1847 # ...
1848 undef $db ;
1849 untie %hash ;
1850
1851Hopefully the contents of each of the filters should be
1852self-explanatory. Both "fetch" filters remove the terminating NULL,
1853and both "store" filters add a terminating NULL.
1854
1855
1856=head2 Another Example -- Key is a C int.
1857
1858Here is another real-life example. By default, whenever Perl writes to
1859a DBM database it always writes the key and value as strings. So when
1860you use this:
1861
3c4b39be 1862 $hash{12345} = "something" ;
cad2e5aa
JH
1863
1864the key 12345 will get stored in the DBM database as the 5 byte string
1865"12345". If you actually want the key to be stored in the DBM database
1866as a C int, you will have to use C<pack> when writing, and C<unpack>
1867when reading.
1868
1869Here is a DBM Filter that does it:
1870
3245f058 1871 use warnings ;
cad2e5aa
JH
1872 use strict ;
1873 use DB_File ;
1874 my %hash ;
2359510d 1875 my $filename = "filt" ;
cad2e5aa
JH
1876 unlink $filename ;
1877
1878
1879 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1880 or die "Cannot open $filename: $!\n" ;
1881
1882 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1883 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1884 $hash{123} = "def" ;
1885 # ...
1886 undef $db ;
1887 untie %hash ;
1888
1889This time only two filters have been used -- we only need to manipulate
1890the contents of the key, so it wasn't necessary to install any value
1891filters.
1892
f6b705ef 1893=head1 HINTS AND TIPS
3b35bae3 1894
3b35bae3 1895
b90e71be 1896=head2 Locking: The Trouble with fd
3b35bae3 1897
b90e71be
GS
1898Until version 1.72 of this module, the recommended technique for locking
1899B<DB_File> databases was to flock the filehandle returned from the "fd"
1900function. Unfortunately this technique has been shown to be fundamentally
1901flawed (Kudos to David Harris for tracking this down). Use it at your own
1902peril!
3b35bae3 1903
b90e71be 1904The locking technique went like this.
cb1a09d0 1905
2359510d
SD
1906 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1907 || die "dbcreat foo.db $!";
b90e71be
GS
1908 $fd = $db->fd;
1909 open(DB_FH, "+<&=$fd") || die "dup $!";
1910 flock (DB_FH, LOCK_EX) || die "flock: $!";
1911 ...
1912 $db{"Tom"} = "Jerry" ;
1913 ...
1914 flock(DB_FH, LOCK_UN);
1915 undef $db;
1916 untie %db;
1917 close(DB_FH);
cb1a09d0 1918
b90e71be 1919In simple terms, this is what happens:
cb1a09d0 1920
b90e71be 1921=over 5
cb1a09d0 1922
b90e71be 1923=item 1.
cb1a09d0 1924
b90e71be 1925Use "tie" to open the database.
cb1a09d0 1926
b90e71be 1927=item 2.
cb1a09d0 1928
b90e71be 1929Lock the database with fd & flock.
cb1a09d0 1930
b90e71be 1931=item 3.
cb1a09d0 1932
b90e71be 1933Read & Write to the database.
cb1a09d0 1934
b90e71be 1935=item 4.
cb1a09d0 1936
b90e71be 1937Unlock and close the database.
cb1a09d0 1938
b90e71be
GS
1939=back
1940
1941Here is the crux of the problem. A side-effect of opening the B<DB_File>
1942database in step 2 is that an initial block from the database will get
1943read from disk and cached in memory.
1944
1945To see why this is a problem, consider what can happen when two processes,
1946say "A" and "B", both want to update the same B<DB_File> database
1947using the locking steps outlined above. Assume process "A" has already
1948opened the database and has a write lock, but it hasn't actually updated
1949the database yet (it has finished step 2, but not started step 3 yet). Now
1950process "B" tries to open the same database - step 1 will succeed,
1951but it will block on step 2 until process "A" releases the lock. The
1952important thing to notice here is that at this point in time both
1953processes will have cached identical initial blocks from the database.
1954
1955Now process "A" updates the database and happens to change some of the
1956data held in the initial buffer. Process "A" terminates, flushing
1957all cached data to disk and releasing the database lock. At this point
1958the database on disk will correctly reflect the changes made by process
1959"A".
1960
1961With the lock released, process "B" can now continue. It also updates the
1962database and unfortunately it too modifies the data that was in its
1963initial buffer. Once that data gets flushed to disk it will overwrite
1964some/all of the changes process "A" made to the database.
1965
1966The result of this scenario is at best a database that doesn't contain
1967what you expect. At worst the database will corrupt.
1968
1969The above won't happen every time competing process update the same
1970B<DB_File> database, but it does illustrate why the technique should
1971not be used.
1972
1973=head2 Safe ways to lock a database
1974
1975Starting with version 2.x, Berkeley DB has internal support for locking.
1976The companion module to this one, B<BerkeleyDB>, provides an interface
1977to this locking functionality. If you are serious about locking
1978Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1979
1980If using B<BerkeleyDB> isn't an option, there are a number of modules
1981available on CPAN that can be used to implement locking. Each one
1982implements locking differently and has different goals in mind. It is
1983therefore worth knowing the difference, so that you can pick the right
1984one for your application. Here are the three locking wrappers:
1985
1986=over 5
1987
1988=item B<Tie::DB_Lock>
1989
1990A B<DB_File> wrapper which creates copies of the database file for
1991read access, so that you have a kind of a multiversioning concurrent read
1992system. However, updates are still serial. Use for databases where reads
1993may be lengthy and consistency problems may occur.
1994
1995=item B<Tie::DB_LockFile>
1996
1997A B<DB_File> wrapper that has the ability to lock and unlock the database
1998while it is being used. Avoids the tie-before-flock problem by simply
1999re-tie-ing the database when you get or drop a lock. Because of the
2000flexibility in dropping and re-acquiring the lock in the middle of a
2001session, this can be massaged into a system that will work with long
2002updates and/or reads if the application follows the hints in the POD
2003documentation.
2004
2005=item B<DB_File::Lock>
2006
2007An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2008before tie-ing the database and drops the lock after the untie. Allows
2009one to use the same lockfile for multiple databases to avoid deadlock
2010problems, if desired. Use for databases where updates are reads are
2011quick and simple flock locking semantics are enough.
2012
2013=back
cb1a09d0 2014
68dc0745 2015=head2 Sharing Databases With C Applications
f6b705ef 2016
2017There is no technical reason why a Berkeley DB database cannot be
2018shared by both a Perl and a C application.
2019
2020The vast majority of problems that are reported in this area boil down
2021to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2022not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 2023
2024Here is a real example. Netscape 2.0 keeps a record of the locations you
2025visit along with the time you last visited them in a DB_HASH database.
2026This is usually stored in the file F<~/.netscape/history.db>. The key
2027field in the database is the location string and the value field is the
2028time the location was last visited stored as a 4 byte binary value.
2029
2030If you haven't already guessed, the location string is stored with a
2031terminating NULL. This means you need to be careful when accessing the
2032database.
2033
2034Here is a snippet of code that is loosely based on Tom Christiansen's
2035I<ggh> script (available from your nearest CPAN archive in
2036F<authors/id/TOMC/scripts/nshist.gz>).
2037
3245f058 2038 use warnings ;
610ab055 2039 use strict ;
f6b705ef 2040 use DB_File ;
2041 use Fcntl ;
f6b705ef 2042
962cee9f 2043 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2044 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2045
2046 $HISTORY = "$dotdir/.netscape/history.db";
2047
2048 tie %hist_db, 'DB_File', $HISTORY
2049 or die "Cannot open $HISTORY: $!\n" ;;
2050
2051 # Dump the complete database
2052 while ( ($href, $binary_time) = each %hist_db ) {
2053
2054 # remove the terminating NULL
2055 $href =~ s/\x00$// ;
2056
2057 # convert the binary time into a user friendly string
2058 $date = localtime unpack("V", $binary_time);
2059 print "$date $href\n" ;
2060 }
2061
2062 # check for the existence of a specific key
2063 # remember to add the NULL
2064 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2065 $date = localtime unpack("V", $binary_time) ;
2066 print "Last visited mox.perl.com on $date\n" ;
2067 }
2068 else {
2069 print "Never visited mox.perl.com\n"
2070 }
2071
2072 untie %hist_db ;
2073
68dc0745 2074=head2 The untie() Gotcha
778183f3 2075
7a2e2cd6 2076If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2077recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2078
2079Even if you don't currently make use of the API interface, it is still
2080worth reading it.
2081
2082Here is an example which illustrates the problem from a B<DB_File>
2083perspective:
2084
2085 use DB_File ;
2086 use Fcntl ;
2087
2088 my %x ;
2089 my $X ;
2090
2091 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2092 or die "Cannot tie first time: $!" ;
2093
2094 $x{123} = 456 ;
2095
2096 untie %x ;
2097
2098 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2099 or die "Cannot tie second time: $!" ;
2100
2101 untie %x ;
2102
2103When run, the script will produce this error message:
2104
2105 Cannot tie second time: Invalid argument at bad.file line 14.
2106
2107Although the error message above refers to the second tie() statement
2108in the script, the source of the problem is really with the untie()
2109statement that precedes it.
2110
2111Having read L<perltie> you will probably have already guessed that the
2112error is caused by the extra copy of the tied object stored in C<$X>.
2113If you haven't, then the problem boils down to the fact that the
2114B<DB_File> destructor, DESTROY, will not be called until I<all>
2115references to the tied object are destroyed. Both the tied variable,
2116C<%x>, and C<$X> above hold a reference to the object. The call to
2117untie() will destroy the first, but C<$X> still holds a valid
2118reference, so the destructor will not get called and the database file
2119F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2120attempt to open a database that is already open via the catch-all
778183f3
PM
2121"Invalid argument" doesn't help.
2122
2123If you run the script with the C<-w> flag the error message becomes:
2124
2125 untie attempted while 1 inner references still exist at bad.file line 12.
2126 Cannot tie second time: Invalid argument at bad.file line 14.
2127
2128which pinpoints the real problem. Finally the script can now be
2129modified to fix the original problem by destroying the API object
2130before the untie:
2131
2132 ...
2133 $x{123} = 456 ;
2134
2135 undef $X ;
2136 untie %x ;
2137
2138 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2139 ...
2140
f6b705ef 2141
2142=head1 COMMON QUESTIONS
2143
2144=head2 Why is there Perl source in my database?
2145
2146If you look at the contents of a database file created by DB_File,
2147there can sometimes be part of a Perl script included in it.
2148
2149This happens because Berkeley DB uses dynamic memory to allocate
2150buffers which will subsequently be written to the database file. Being
2151dynamic, the memory could have been used for anything before DB
2152malloced it. As Berkeley DB doesn't clear the memory once it has been
2153allocated, the unused portions will contain random junk. In the case
2154where a Perl script gets written to the database, the random junk will
2155correspond to an area of dynamic memory that happened to be used during
2156the compilation of the script.
2157
2158Unless you don't like the possibility of there being part of your Perl
2159scripts embedded in a database file, this is nothing to worry about.
2160
2161=head2 How do I store complex data structures with DB_File?
2162
2163Although B<DB_File> cannot do this directly, there is a module which
2164can layer transparently over B<DB_File> to accomplish this feat.
2165
2166Check out the MLDBM module, available on CPAN in the directory
2167F<modules/by-module/MLDBM>.
2168
2169=head2 What does "Invalid Argument" mean?
2170
2171You will get this error message when one of the parameters in the
2172C<tie> call is wrong. Unfortunately there are quite a few parameters to
2173get wrong, so it can be difficult to figure out which one it is.
2174
2175Here are a couple of possibilities:
2176
2177=over 5
2178
2179=item 1.
2180
610ab055 2181Attempting to reopen a database without closing it.
f6b705ef 2182
2183=item 2.
2184
2185Using the O_WRONLY flag.
2186
2187=back
2188
2189=head2 What does "Bareword 'DB_File' not allowed" mean?
2190
2191You will encounter this particular error message when you have the
2192C<strict 'subs'> pragma (or the full strict pragma) in your script.
2193Consider this script:
2194
3245f058 2195 use warnings ;
f6b705ef 2196 use strict ;
2197 use DB_File ;
07200f1b 2198 my %x ;
f6b705ef 2199 tie %x, DB_File, "filename" ;
2200
2201Running it produces the error in question:
2202
2203 Bareword "DB_File" not allowed while "strict subs" in use
2204
2205To get around the error, place the word C<DB_File> in either single or
2206double quotes, like this:
2207
2208 tie %x, "DB_File", "filename" ;
2209
2210Although it might seem like a real pain, it is really worth the effort
2211of having a C<use strict> in all your scripts.
2212
cad2e5aa
JH
2213=head1 REFERENCES
2214
2215Articles that are either about B<DB_File> or make use of it.
2216
2217=over 5
2218
2219=item 1.
2220
2221I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2222Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2223
2224=back
2225
cb1a09d0
AD
2226=head1 HISTORY
2227
1f70e1ea 2228Moved to the Changes file.
610ab055 2229
1f70e1ea 2230=head1 BUGS
05475680 2231
1f70e1ea
PM
2232Some older versions of Berkeley DB had problems with fixed length
2233records using the RECNO file format. This problem has been fixed since
2234version 1.85 of Berkeley DB.
e858de61 2235
1f70e1ea
PM
2236I am sure there are bugs in the code. If you do find any, or can
2237suggest any enhancements, I would welcome your comments.
a6ed719b 2238
1f70e1ea 2239=head1 AVAILABILITY
a6ed719b 2240
1f70e1ea
PM
2241B<DB_File> comes with the standard Perl source distribution. Look in
2242the directory F<ext/DB_File>. Given the amount of time between releases
2243of Perl the version that ships with Perl is quite likely to be out of
2244date, so the most recent version can always be found on CPAN (see
5bbd4290 2245L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2246F<modules/by-module/DB_File>.
a6ed719b 2247
039d031f
PM
2248This version of B<DB_File> will work with either version 1.x, 2.x or
22493.x of Berkeley DB, but is limited to the functionality provided by
2250version 1.
a6ed719b 2251
cad2e5aa 2252The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2253All versions of Berkeley DB are available there.
93af7a87 2254
1f70e1ea
PM
2255Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2256archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2257
1f70e1ea
PM
2258If you are running IRIX, then get Berkeley DB version 1 from
2259F<http://reality.sgi.com/ariel>. It has the patches necessary to
2260compile properly on IRIX 5.3.
610ab055 2261
1f70e1ea 2262=head1 COPYRIGHT
3b35bae3 2263
dcdb9d1f 2264Copyright (c) 1995-2005 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2265is free software; you can redistribute it and/or modify it under the
2266same terms as Perl itself.
3b35bae3 2267
1f70e1ea
PM
2268Although B<DB_File> is covered by the Perl license, the library it
2269makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2270copyright and its own license. Please take the time to read it.
3b35bae3 2271
a9fd575d 2272Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2273F<http://www.sleepycat.com>) regarding the license:
68dc0745 2274
a9fd575d 2275 Do I have to license DB to use it in Perl scripts?
3b35bae3 2276
a9fd575d
PM
2277 No. The Berkeley DB license requires that software that uses
2278 Berkeley DB be freely redistributable. In the case of Perl, that
2279 software is Perl, and not your scripts. Any Perl scripts that you
2280 write are your property, including scripts that make use of
2281 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2282 place any restriction on what you may do with them.
88108326 2283
1f70e1ea
PM
2284If you are in any doubt about the license situation, contact either the
2285Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2286
2287
3b35bae3
AD
2288=head1 SEE ALSO
2289
5bbd4290
PM
2290L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2291L<perldbmfilter>
3b35bae3 2292
3b35bae3
AD
2293=head1 AUTHOR
2294
8e07c86e 2295The DB_File interface was written by Paul Marquess
5bbd4290 2296E<lt>pmqs@cpan.orgE<gt>.
d3ef3b8a 2297Questions about the DB system itself may be addressed to
5bbd4290 2298E<lt>db@sleepycat.comE<gt>.
3b35bae3
AD
2299
2300=cut