Commit | Line | Data |
---|---|---|
a0d0e21e LW |
1 | # DB_File.pm -- Perl 5 interface to Berkeley DB |
2 | # | |
b6990ae0 | 3 | # Written by Paul Marquess (pmqs@cpan.org) |
36477c24 | 4 | # |
b6990ae0 | 5 | # Copyright (c) 1995-2013 Paul Marquess. All rights reserved. |
36477c24 | 6 | # This program is free software; you can redistribute it and/or |
7 | # modify it under the same terms as Perl itself. | |
8 | ||
8e07c86e AD |
9 | |
10 | package DB_File::HASHINFO ; | |
785da04d | 11 | |
c1c1a1b2 | 12 | require 5.00504; |
610ab055 | 13 | |
3245f058 | 14 | use warnings; |
785da04d | 15 | use strict; |
8e07c86e | 16 | use Carp; |
88108326 | 17 | require Tie::Hash; |
18 | @DB_File::HASHINFO::ISA = qw(Tie::Hash); | |
8e07c86e | 19 | |
88108326 | 20 | sub new |
8e07c86e | 21 | { |
88108326 | 22 | my $pkg = shift ; |
23 | my %x ; | |
24 | tie %x, $pkg ; | |
25 | bless \%x, $pkg ; | |
8e07c86e AD |
26 | } |
27 | ||
610ab055 | 28 | |
88108326 | 29 | sub TIEHASH |
30 | { | |
31 | my $pkg = shift ; | |
32 | ||
efc79c7d PM |
33 | bless { VALID => { |
34 | bsize => 1, | |
35 | ffactor => 1, | |
36 | nelem => 1, | |
37 | cachesize => 1, | |
38 | hash => 2, | |
39 | lorder => 1, | |
36477c24 | 40 | }, |
41 | GOT => {} | |
42 | }, $pkg ; | |
88108326 | 43 | } |
8e07c86e | 44 | |
610ab055 | 45 | |
8e07c86e AD |
46 | sub FETCH |
47 | { | |
88108326 | 48 | my $self = shift ; |
49 | my $key = shift ; | |
8e07c86e | 50 | |
36477c24 | 51 | return $self->{GOT}{$key} if exists $self->{VALID}{$key} ; |
88108326 | 52 | |
53 | my $pkg = ref $self ; | |
54 | croak "${pkg}::FETCH - Unknown element '$key'" ; | |
8e07c86e AD |
55 | } |
56 | ||
57 | ||
58 | sub STORE | |
59 | { | |
88108326 | 60 | my $self = shift ; |
61 | my $key = shift ; | |
62 | my $value = shift ; | |
63 | ||
efc79c7d PM |
64 | my $type = $self->{VALID}{$key}; |
65 | ||
66 | if ( $type ) | |
8e07c86e | 67 | { |
efc79c7d PM |
68 | croak "Key '$key' not associated with a code reference" |
69 | if $type == 2 && !ref $value && ref $value ne 'CODE'; | |
36477c24 | 70 | $self->{GOT}{$key} = $value ; |
8e07c86e AD |
71 | return ; |
72 | } | |
73 | ||
88108326 | 74 | my $pkg = ref $self ; |
75 | croak "${pkg}::STORE - Unknown element '$key'" ; | |
8e07c86e AD |
76 | } |
77 | ||
78 | sub DELETE | |
79 | { | |
88108326 | 80 | my $self = shift ; |
81 | my $key = shift ; | |
82 | ||
36477c24 | 83 | if ( exists $self->{VALID}{$key} ) |
8e07c86e | 84 | { |
36477c24 | 85 | delete $self->{GOT}{$key} ; |
8e07c86e AD |
86 | return ; |
87 | } | |
88 | ||
88108326 | 89 | my $pkg = ref $self ; |
90 | croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; | |
8e07c86e AD |
91 | } |
92 | ||
88108326 | 93 | sub EXISTS |
8e07c86e | 94 | { |
88108326 | 95 | my $self = shift ; |
96 | my $key = shift ; | |
8e07c86e | 97 | |
36477c24 | 98 | exists $self->{VALID}{$key} ; |
8e07c86e AD |
99 | } |
100 | ||
88108326 | 101 | sub NotHere |
8e07c86e | 102 | { |
18d2dc8c | 103 | my $self = shift ; |
88108326 | 104 | my $method = shift ; |
8e07c86e | 105 | |
18d2dc8c | 106 | croak ref($self) . " does not define the method ${method}" ; |
8e07c86e AD |
107 | } |
108 | ||
18d2dc8c PM |
109 | sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") } |
110 | sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") } | |
111 | sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } | |
8e07c86e AD |
112 | |
113 | package DB_File::RECNOINFO ; | |
785da04d | 114 | |
3245f058 | 115 | use warnings; |
88108326 | 116 | use strict ; |
117 | ||
045291aa | 118 | @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; |
8e07c86e AD |
119 | |
120 | sub TIEHASH | |
121 | { | |
88108326 | 122 | my $pkg = shift ; |
123 | ||
36477c24 | 124 | bless { VALID => { map {$_, 1} |
125 | qw( bval cachesize psize flags lorder reclen bfname ) | |
126 | }, | |
127 | GOT => {}, | |
128 | }, $pkg ; | |
8e07c86e AD |
129 | } |
130 | ||
88108326 | 131 | package DB_File::BTREEINFO ; |
8e07c86e | 132 | |
3245f058 | 133 | use warnings; |
88108326 | 134 | use strict ; |
8e07c86e | 135 | |
88108326 | 136 | @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; |
8e07c86e | 137 | |
88108326 | 138 | sub TIEHASH |
8e07c86e | 139 | { |
88108326 | 140 | my $pkg = shift ; |
141 | ||
efc79c7d PM |
142 | bless { VALID => { |
143 | flags => 1, | |
144 | cachesize => 1, | |
145 | maxkeypage => 1, | |
146 | minkeypage => 1, | |
147 | psize => 1, | |
148 | compare => 2, | |
149 | prefix => 2, | |
150 | lorder => 1, | |
36477c24 | 151 | }, |
152 | GOT => {}, | |
153 | }, $pkg ; | |
8e07c86e AD |
154 | } |
155 | ||
156 | ||
8e07c86e | 157 | package DB_File ; |
785da04d | 158 | |
3245f058 | 159 | use warnings; |
785da04d | 160 | use strict; |
07200f1b | 161 | our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO); |
ebf49c8b | 162 | our ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error); |
8e07c86e AD |
163 | use Carp; |
164 | ||
785da04d | 165 | |
16f3356e | 166 | $VERSION = "1.831" ; |
083e9212 | 167 | $VERSION = eval $VERSION; # needed for dev releases |
d85a743d PM |
168 | |
169 | { | |
82c92bb0 | 170 | local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);}; |
d85a743d | 171 | my @a =(1); splice(@a, 3); |
ebf49c8b CBW |
172 | $splice_end_array_no_length = |
173 | ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /); | |
174 | } | |
175 | { | |
82c92bb0 | 176 | local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);}; |
ebf49c8b | 177 | my @a =(1); splice(@a, 3, 1); |
d85a743d PM |
178 | $splice_end_array = |
179 | ($splice_end_array =~ /^splice\(\) offset past end of array at /); | |
180 | } | |
8e07c86e AD |
181 | |
182 | #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; | |
88108326 | 183 | $DB_BTREE = new DB_File::BTREEINFO ; |
184 | $DB_HASH = new DB_File::HASHINFO ; | |
185 | $DB_RECNO = new DB_File::RECNOINFO ; | |
8e07c86e | 186 | |
785da04d | 187 | require Tie::Hash; |
8e07c86e | 188 | require Exporter; |
b90e71be GS |
189 | BEGIN { |
190 | $use_XSLoader = 1 ; | |
e5021521 | 191 | { local $SIG{__DIE__} ; eval { require XSLoader } ; } |
b90e71be GS |
192 | |
193 | if ($@) { | |
194 | $use_XSLoader = 0 ; | |
195 | require DynaLoader; | |
196 | @ISA = qw(DynaLoader); | |
197 | } | |
198 | } | |
199 | ||
200 | push @ISA, qw(Tie::Hash Exporter); | |
8e07c86e AD |
201 | @EXPORT = qw( |
202 | $DB_BTREE $DB_HASH $DB_RECNO | |
88108326 | 203 | |
8e07c86e AD |
204 | BTREEMAGIC |
205 | BTREEVERSION | |
206 | DB_LOCK | |
207 | DB_SHMEM | |
208 | DB_TXN | |
209 | HASHMAGIC | |
210 | HASHVERSION | |
211 | MAX_PAGE_NUMBER | |
212 | MAX_PAGE_OFFSET | |
213 | MAX_REC_NUMBER | |
214 | RET_ERROR | |
215 | RET_SPECIAL | |
216 | RET_SUCCESS | |
217 | R_CURSOR | |
218 | R_DUP | |
219 | R_FIRST | |
220 | R_FIXEDLEN | |
221 | R_IAFTER | |
222 | R_IBEFORE | |
223 | R_LAST | |
224 | R_NEXT | |
225 | R_NOKEY | |
226 | R_NOOVERWRITE | |
227 | R_PREV | |
228 | R_RECNOSYNC | |
229 | R_SETCURSOR | |
230 | R_SNAPSHOT | |
231 | __R_UNUSED | |
88108326 | 232 | |
045291aa | 233 | ); |
8e07c86e AD |
234 | |
235 | sub AUTOLOAD { | |
785da04d | 236 | my($constname); |
8e07c86e | 237 | ($constname = $AUTOLOAD) =~ s/.*:://; |
07200f1b PM |
238 | my ($error, $val) = constant($constname); |
239 | Carp::croak $error if $error; | |
57c77851 JS |
240 | no strict 'refs'; |
241 | *{$AUTOLOAD} = sub { $val }; | |
242 | goto &{$AUTOLOAD}; | |
07200f1b | 243 | } |
8e07c86e | 244 | |
f6b705ef | 245 | |
a6ed719b | 246 | eval { |
1f70e1ea PM |
247 | # Make all Fcntl O_XXX constants available for importing |
248 | require Fcntl; | |
249 | my @O = grep /^O_/, @Fcntl::EXPORT; | |
250 | Fcntl->import(@O); # first we import what we want to export | |
251 | push(@EXPORT, @O); | |
a6ed719b | 252 | }; |
f6b705ef | 253 | |
b90e71be GS |
254 | if ($use_XSLoader) |
255 | { XSLoader::load("DB_File", $VERSION)} | |
256 | else | |
257 | { bootstrap DB_File $VERSION } | |
8e07c86e | 258 | |
05475680 | 259 | sub tie_hash_or_array |
610ab055 PM |
260 | { |
261 | my (@arg) = @_ ; | |
05475680 | 262 | my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ; |
610ab055 | 263 | |
f1aa208b RGS |
264 | use File::Spec; |
265 | $arg[1] = File::Spec->rel2abs($arg[1]) | |
266 | if defined $arg[1] ; | |
267 | ||
610ab055 PM |
268 | $arg[4] = tied %{ $arg[4] } |
269 | if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ; | |
270 | ||
efc79c7d PM |
271 | $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2]; |
272 | $arg[3] = 0666 if @arg >=4 && ! defined $arg[3]; | |
273 | ||
9c095db2 PM |
274 | # make recno in Berkeley DB version 2 (or better) work like |
275 | # recno in version 1. | |
d6067fe3 SP |
276 | if ($db_version >= 4 and ! $tieHASH) { |
277 | $arg[2] |= O_CREAT(); | |
278 | } | |
279 | ||
1f70e1ea PM |
280 | if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and |
281 | $arg[1] and ! -e $arg[1]) { | |
282 | open(FH, ">$arg[1]") or return undef ; | |
283 | close FH ; | |
284 | chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ; | |
285 | } | |
286 | ||
05475680 | 287 | DoTie_($tieHASH, @arg) ; |
610ab055 PM |
288 | } |
289 | ||
05475680 PM |
290 | sub TIEHASH |
291 | { | |
292 | tie_hash_or_array(@_) ; | |
293 | } | |
294 | ||
295 | sub TIEARRAY | |
296 | { | |
297 | tie_hash_or_array(@_) ; | |
298 | } | |
88108326 | 299 | |
045291aa PM |
300 | sub CLEAR |
301 | { | |
1f70e1ea | 302 | my $self = shift; |
3245f058 | 303 | my $key = 0 ; |
1f70e1ea PM |
304 | my $value = "" ; |
305 | my $status = $self->seq($key, $value, R_FIRST()); | |
306 | my @keys; | |
307 | ||
308 | while ($status == 0) { | |
309 | push @keys, $key; | |
310 | $status = $self->seq($key, $value, R_NEXT()); | |
311 | } | |
312 | foreach $key (reverse @keys) { | |
313 | my $s = $self->del($key); | |
314 | } | |
315 | } | |
316 | ||
045291aa PM |
317 | sub EXTEND { } |
318 | ||
319 | sub STORESIZE | |
320 | { | |
321 | my $self = shift; | |
322 | my $length = shift ; | |
323 | my $current_length = $self->length() ; | |
324 | ||
325 | if ($length < $current_length) { | |
326 | my $key ; | |
327 | for ($key = $current_length - 1 ; $key >= $length ; -- $key) | |
328 | { $self->del($key) } | |
329 | } | |
a9fd575d PM |
330 | elsif ($length > $current_length) { |
331 | $self->put($length-1, "") ; | |
332 | } | |
045291aa PM |
333 | } |
334 | ||
c5da4faf PM |
335 | |
336 | sub SPLICE | |
337 | { | |
338 | my $self = shift; | |
339 | my $offset = shift; | |
340 | if (not defined $offset) { | |
d85a743d | 341 | warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); |
c5da4faf PM |
342 | $offset = 0; |
343 | } | |
344 | ||
ebf49c8b | 345 | my $has_length = @_; |
c5da4faf PM |
346 | my $length = @_ ? shift : 0; |
347 | # Carping about definedness comes _after_ the OFFSET sanity check. | |
348 | # This is so we get the same error messages as Perl's splice(). | |
349 | # | |
350 | ||
351 | my @list = @_; | |
352 | ||
353 | my $size = $self->FETCHSIZE(); | |
354 | ||
355 | # 'If OFFSET is negative then it start that far from the end of | |
356 | # the array.' | |
357 | # | |
358 | if ($offset < 0) { | |
359 | my $new_offset = $size + $offset; | |
360 | if ($new_offset < 0) { | |
361 | die "Modification of non-creatable array value attempted, " | |
362 | . "subscript $offset"; | |
363 | } | |
364 | $offset = $new_offset; | |
365 | } | |
366 | ||
c5da4faf | 367 | if (not defined $length) { |
d85a743d | 368 | warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); |
c5da4faf PM |
369 | $length = 0; |
370 | } | |
371 | ||
d85a743d PM |
372 | if ($offset > $size) { |
373 | $offset = $size; | |
374 | warnings::warnif('misc', 'splice() offset past end of array') | |
ebf49c8b | 375 | if $has_length ? $splice_end_array : $splice_end_array_no_length; |
d85a743d PM |
376 | } |
377 | ||
c5da4faf PM |
378 | # 'If LENGTH is omitted, removes everything from OFFSET onward.' |
379 | if (not defined $length) { | |
380 | $length = $size - $offset; | |
381 | } | |
382 | ||
383 | # 'If LENGTH is negative, leave that many elements off the end of | |
384 | # the array.' | |
385 | # | |
386 | if ($length < 0) { | |
387 | $length = $size - $offset + $length; | |
388 | ||
389 | if ($length < 0) { | |
390 | # The user must have specified a length bigger than the | |
391 | # length of the array passed in. But perl's splice() | |
392 | # doesn't catch this, it just behaves as for length=0. | |
393 | # | |
394 | $length = 0; | |
395 | } | |
396 | } | |
397 | ||
398 | if ($length > $size - $offset) { | |
399 | $length = $size - $offset; | |
400 | } | |
401 | ||
402 | # $num_elems holds the current number of elements in the database. | |
403 | my $num_elems = $size; | |
404 | ||
405 | # 'Removes the elements designated by OFFSET and LENGTH from an | |
406 | # array,'... | |
407 | # | |
408 | my @removed = (); | |
409 | foreach (0 .. $length - 1) { | |
410 | my $old; | |
411 | my $status = $self->get($offset, $old); | |
412 | if ($status != 0) { | |
413 | my $msg = "error from Berkeley DB on get($offset, \$old)"; | |
414 | if ($status == 1) { | |
415 | $msg .= ' (no such element?)'; | |
416 | } | |
417 | else { | |
418 | $msg .= ": error status $status"; | |
419 | if (defined $! and $! ne '') { | |
420 | $msg .= ", message $!"; | |
421 | } | |
422 | } | |
423 | die $msg; | |
424 | } | |
425 | push @removed, $old; | |
426 | ||
427 | $status = $self->del($offset); | |
428 | if ($status != 0) { | |
429 | my $msg = "error from Berkeley DB on del($offset)"; | |
430 | if ($status == 1) { | |
431 | $msg .= ' (no such element?)'; | |
432 | } | |
433 | else { | |
434 | $msg .= ": error status $status"; | |
435 | if (defined $! and $! ne '') { | |
436 | $msg .= ", message $!"; | |
437 | } | |
438 | } | |
439 | die $msg; | |
440 | } | |
441 | ||
442 | -- $num_elems; | |
443 | } | |
444 | ||
445 | # ...'and replaces them with the elements of LIST, if any.' | |
446 | my $pos = $offset; | |
447 | while (defined (my $elem = shift @list)) { | |
448 | my $old_pos = $pos; | |
449 | my $status; | |
450 | if ($pos >= $num_elems) { | |
451 | $status = $self->put($pos, $elem); | |
452 | } | |
453 | else { | |
454 | $status = $self->put($pos, $elem, $self->R_IBEFORE); | |
455 | } | |
456 | ||
457 | if ($status != 0) { | |
458 | my $msg = "error from Berkeley DB on put($pos, $elem, ...)"; | |
459 | if ($status == 1) { | |
460 | $msg .= ' (no such element?)'; | |
461 | } | |
462 | else { | |
463 | $msg .= ", error status $status"; | |
464 | if (defined $! and $! ne '') { | |
465 | $msg .= ", message $!"; | |
466 | } | |
467 | } | |
468 | die $msg; | |
469 | } | |
470 | ||
471 | die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE" | |
472 | if $old_pos != $pos; | |
473 | ||
474 | ++ $pos; | |
475 | ++ $num_elems; | |
476 | } | |
477 | ||
478 | if (wantarray) { | |
479 | # 'In list context, returns the elements removed from the | |
480 | # array.' | |
481 | # | |
482 | return @removed; | |
483 | } | |
484 | elsif (defined wantarray and not wantarray) { | |
485 | # 'In scalar context, returns the last element removed, or | |
486 | # undef if no elements are removed.' | |
487 | # | |
488 | if (@removed) { | |
489 | my $last = pop @removed; | |
490 | return "$last"; | |
491 | } | |
492 | else { | |
493 | return undef; | |
494 | } | |
495 | } | |
496 | elsif (not defined wantarray) { | |
497 | # Void context | |
498 | } | |
499 | else { die } | |
500 | } | |
501 | sub ::DB_File::splice { &SPLICE } | |
502 | ||
6ca2e664 PM |
503 | sub find_dup |
504 | { | |
505 | croak "Usage: \$db->find_dup(key,value)\n" | |
506 | unless @_ == 3 ; | |
507 | ||
508 | my $db = shift ; | |
509 | my ($origkey, $value_wanted) = @_ ; | |
510 | my ($key, $value) = ($origkey, 0); | |
511 | my ($status) = 0 ; | |
512 | ||
513 | for ($status = $db->seq($key, $value, R_CURSOR() ) ; | |
514 | $status == 0 ; | |
515 | $status = $db->seq($key, $value, R_NEXT() ) ) { | |
516 | ||
517 | return 0 if $key eq $origkey and $value eq $value_wanted ; | |
518 | } | |
519 | ||
520 | return $status ; | |
521 | } | |
522 | ||
523 | sub del_dup | |
524 | { | |
525 | croak "Usage: \$db->del_dup(key,value)\n" | |
526 | unless @_ == 3 ; | |
527 | ||
528 | my $db = shift ; | |
529 | my ($key, $value) = @_ ; | |
530 | my ($status) = $db->find_dup($key, $value) ; | |
531 | return $status if $status != 0 ; | |
532 | ||
533 | $status = $db->del($key, R_CURSOR() ) ; | |
534 | return $status ; | |
535 | } | |
536 | ||
88108326 | 537 | sub get_dup |
538 | { | |
539 | croak "Usage: \$db->get_dup(key [,flag])\n" | |
540 | unless @_ == 2 or @_ == 3 ; | |
541 | ||
542 | my $db = shift ; | |
543 | my $key = shift ; | |
544 | my $flag = shift ; | |
f6b705ef | 545 | my $value = 0 ; |
88108326 | 546 | my $origkey = $key ; |
547 | my $wantarray = wantarray ; | |
f6b705ef | 548 | my %values = () ; |
88108326 | 549 | my @values = () ; |
550 | my $counter = 0 ; | |
f6b705ef | 551 | my $status = 0 ; |
88108326 | 552 | |
f6b705ef | 553 | # iterate through the database until either EOF ($status == 0) |
554 | # or a different key is encountered ($key ne $origkey). | |
555 | for ($status = $db->seq($key, $value, R_CURSOR()) ; | |
556 | $status == 0 and $key eq $origkey ; | |
557 | $status = $db->seq($key, $value, R_NEXT()) ) { | |
88108326 | 558 | |
f6b705ef | 559 | # save the value or count number of matches |
560 | if ($wantarray) { | |
561 | if ($flag) | |
562 | { ++ $values{$value} } | |
563 | else | |
564 | { push (@values, $value) } | |
565 | } | |
566 | else | |
567 | { ++ $counter } | |
88108326 | 568 | |
88108326 | 569 | } |
570 | ||
f6b705ef | 571 | return ($wantarray ? ($flag ? %values : @values) : $counter) ; |
88108326 | 572 | } |
573 | ||
574 | ||
a30cae0b CBW |
575 | sub STORABLE_freeze |
576 | { | |
577 | my $type = ref shift; | |
578 | croak "Cannot freeze $type object\n"; | |
579 | } | |
580 | ||
581 | sub STORABLE_thaw | |
582 | { | |
583 | my $type = ref shift; | |
584 | croak "Cannot thaw $type object\n"; | |
585 | } | |
586 | ||
587 | ||
588 | ||
8e07c86e AD |
589 | 1; |
590 | __END__ | |
591 | ||
3b35bae3 AD |
592 | =head1 NAME |
593 | ||
1f70e1ea | 594 | DB_File - Perl5 access to Berkeley DB version 1.x |
3b35bae3 AD |
595 | |
596 | =head1 SYNOPSIS | |
597 | ||
bbc7dcd2 MS |
598 | use DB_File; |
599 | ||
88108326 | 600 | [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; |
601 | [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; | |
602 | [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; | |
760ac839 | 603 | |
3b35bae3 AD |
604 | $status = $X->del($key [, $flags]) ; |
605 | $status = $X->put($key, $value [, $flags]) ; | |
606 | $status = $X->get($key, $value [, $flags]) ; | |
760ac839 | 607 | $status = $X->seq($key, $value, $flags) ; |
3b35bae3 AD |
608 | $status = $X->sync([$flags]) ; |
609 | $status = $X->fd ; | |
760ac839 | 610 | |
f6b705ef | 611 | # BTREE only |
88108326 | 612 | $count = $X->get_dup($key) ; |
613 | @list = $X->get_dup($key) ; | |
614 | %list = $X->get_dup($key, 1) ; | |
6ca2e664 PM |
615 | $status = $X->find_dup($key, $value) ; |
616 | $status = $X->del_dup($key, $value) ; | |
88108326 | 617 | |
f6b705ef | 618 | # RECNO only |
619 | $a = $X->length; | |
620 | $a = $X->pop ; | |
621 | $X->push(list); | |
622 | $a = $X->shift; | |
623 | $X->unshift(list); | |
c5da4faf | 624 | @r = $X->splice(offset, length, elements); |
f6b705ef | 625 | |
cad2e5aa JH |
626 | # DBM Filters |
627 | $old_filter = $db->filter_store_key ( sub { ... } ) ; | |
628 | $old_filter = $db->filter_store_value( sub { ... } ) ; | |
629 | $old_filter = $db->filter_fetch_key ( sub { ... } ) ; | |
630 | $old_filter = $db->filter_fetch_value( sub { ... } ) ; | |
631 | ||
3b35bae3 AD |
632 | untie %hash ; |
633 | untie @array ; | |
634 | ||
635 | =head1 DESCRIPTION | |
636 | ||
8e07c86e | 637 | B<DB_File> is a module which allows Perl programs to make use of the |
1f70e1ea | 638 | facilities provided by Berkeley DB version 1.x (if you have a newer |
0d735f06 | 639 | version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>). |
039d031f PM |
640 | It is assumed that you have a copy of the Berkeley DB manual pages at |
641 | hand when reading this documentation. The interface defined here | |
642 | mirrors the Berkeley DB interface closely. | |
68dc0745 | 643 | |
8e07c86e AD |
644 | Berkeley DB is a C library which provides a consistent interface to a |
645 | number of database formats. B<DB_File> provides an interface to all | |
646 | three of the database types currently supported by Berkeley DB. | |
3b35bae3 AD |
647 | |
648 | The file types are: | |
649 | ||
650 | =over 5 | |
651 | ||
88108326 | 652 | =item B<DB_HASH> |
3b35bae3 | 653 | |
88108326 | 654 | This database type allows arbitrary key/value pairs to be stored in data |
8e07c86e AD |
655 | files. This is equivalent to the functionality provided by other |
656 | hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, | |
657 | the files created using DB_HASH are not compatible with any of the | |
658 | other packages mentioned. | |
3b35bae3 | 659 | |
8e07c86e AD |
660 | A default hashing algorithm, which will be adequate for most |
661 | applications, is built into Berkeley DB. If you do need to use your own | |
662 | hashing algorithm it is possible to write your own in Perl and have | |
663 | B<DB_File> use it instead. | |
3b35bae3 | 664 | |
88108326 | 665 | =item B<DB_BTREE> |
666 | ||
667 | The btree format allows arbitrary key/value pairs to be stored in a | |
8e07c86e | 668 | sorted, balanced binary tree. |
3b35bae3 | 669 | |
8e07c86e AD |
670 | As with the DB_HASH format, it is possible to provide a user defined |
671 | Perl routine to perform the comparison of keys. By default, though, the | |
672 | keys are stored in lexical order. | |
3b35bae3 | 673 | |
88108326 | 674 | =item B<DB_RECNO> |
3b35bae3 | 675 | |
8e07c86e AD |
676 | DB_RECNO allows both fixed-length and variable-length flat text files |
677 | to be manipulated using the same key/value pair interface as in DB_HASH | |
678 | and DB_BTREE. In this case the key will consist of a record (line) | |
679 | number. | |
3b35bae3 AD |
680 | |
681 | =back | |
682 | ||
e5021521 | 683 | =head2 Using DB_File with Berkeley DB version 2 or greater |
1f70e1ea PM |
684 | |
685 | Although B<DB_File> is intended to be used with Berkeley DB version 1, | |
e5021521 | 686 | it can also be used with version 2, 3 or 4. In this case the interface is |
1f70e1ea | 687 | limited to the functionality provided by Berkeley DB 1.x. Anywhere the |
e5021521 | 688 | version 2 or greater interface differs, B<DB_File> arranges for it to work |
039d031f | 689 | like version 1. This feature allows B<DB_File> scripts that were built |
e5021521 | 690 | with version 1 to be migrated to version 2 or greater without any changes. |
1f70e1ea PM |
691 | |
692 | If you want to make use of the new features available in Berkeley DB | |
b90e71be | 693 | 2.x or greater, use the Perl module B<BerkeleyDB> instead. |
1f70e1ea | 694 | |
e5021521 JH |
695 | B<Note:> The database file format has changed multiple times in Berkeley |
696 | DB version 2, 3 and 4. If you cannot recreate your databases, you | |
697 | must dump any existing databases with either the C<db_dump> or the | |
698 | C<db_dump185> utility that comes with Berkeley DB. | |
699 | Once you have rebuilt DB_File to use Berkeley DB version 2 or greater, | |
700 | your databases can be recreated using C<db_load>. Refer to the Berkeley DB | |
1f70e1ea PM |
701 | documentation for further details. |
702 | ||
e5021521 | 703 | Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley |
039d031f | 704 | DB with DB_File. |
1f70e1ea | 705 | |
68dc0745 | 706 | =head2 Interface to Berkeley DB |
3b35bae3 AD |
707 | |
708 | B<DB_File> allows access to Berkeley DB files using the tie() mechanism | |
8e07c86e AD |
709 | in Perl 5 (for full details, see L<perlfunc/tie()>). This facility |
710 | allows B<DB_File> to access Berkeley DB files using either an | |
711 | associative array (for DB_HASH & DB_BTREE file types) or an ordinary | |
712 | array (for the DB_RECNO file type). | |
3b35bae3 | 713 | |
88108326 | 714 | In addition to the tie() interface, it is also possible to access most |
715 | of the functions provided in the Berkeley DB API directly. | |
f6b705ef | 716 | See L<THE API INTERFACE>. |
3b35bae3 | 717 | |
88108326 | 718 | =head2 Opening a Berkeley DB Database File |
3b35bae3 | 719 | |
8e07c86e | 720 | Berkeley DB uses the function dbopen() to open or create a database. |
f6b705ef | 721 | Here is the C prototype for dbopen(): |
3b35bae3 AD |
722 | |
723 | DB* | |
724 | dbopen (const char * file, int flags, int mode, | |
725 | DBTYPE type, const void * openinfo) | |
726 | ||
727 | The parameter C<type> is an enumeration which specifies which of the 3 | |
728 | interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. | |
729 | Depending on which of these is actually chosen, the final parameter, | |
730 | I<openinfo> points to a data structure which allows tailoring of the | |
731 | specific interface method. | |
732 | ||
8e07c86e | 733 | This interface is handled slightly differently in B<DB_File>. Here is |
88108326 | 734 | an equivalent call using B<DB_File>: |
3b35bae3 | 735 | |
88108326 | 736 | tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; |
3b35bae3 | 737 | |
8e07c86e AD |
738 | The C<filename>, C<flags> and C<mode> parameters are the direct |
739 | equivalent of their dbopen() counterparts. The final parameter $DB_HASH | |
740 | performs the function of both the C<type> and C<openinfo> parameters in | |
741 | dbopen(). | |
3b35bae3 | 742 | |
88108326 | 743 | In the example above $DB_HASH is actually a pre-defined reference to a |
744 | hash object. B<DB_File> has three of these pre-defined references. | |
745 | Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. | |
3b35bae3 | 746 | |
8e07c86e AD |
747 | The keys allowed in each of these pre-defined references is limited to |
748 | the names used in the equivalent C structure. So, for example, the | |
749 | $DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, | |
88108326 | 750 | C<ffactor>, C<hash>, C<lorder> and C<nelem>. |
751 | ||
752 | To change one of these elements, just assign to it like this: | |
753 | ||
754 | $DB_HASH->{'cachesize'} = 10000 ; | |
755 | ||
756 | The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are | |
757 | usually adequate for most applications. If you do need to create extra | |
758 | instances of these objects, constructors are available for each file | |
759 | type. | |
760 | ||
761 | Here are examples of the constructors and the valid options available | |
762 | for DB_HASH, DB_BTREE and DB_RECNO respectively. | |
763 | ||
764 | $a = new DB_File::HASHINFO ; | |
765 | $a->{'bsize'} ; | |
766 | $a->{'cachesize'} ; | |
767 | $a->{'ffactor'}; | |
768 | $a->{'hash'} ; | |
769 | $a->{'lorder'} ; | |
770 | $a->{'nelem'} ; | |
771 | ||
772 | $b = new DB_File::BTREEINFO ; | |
773 | $b->{'flags'} ; | |
774 | $b->{'cachesize'} ; | |
775 | $b->{'maxkeypage'} ; | |
776 | $b->{'minkeypage'} ; | |
777 | $b->{'psize'} ; | |
778 | $b->{'compare'} ; | |
779 | $b->{'prefix'} ; | |
780 | $b->{'lorder'} ; | |
781 | ||
782 | $c = new DB_File::RECNOINFO ; | |
783 | $c->{'bval'} ; | |
784 | $c->{'cachesize'} ; | |
785 | $c->{'psize'} ; | |
786 | $c->{'flags'} ; | |
787 | $c->{'lorder'} ; | |
788 | $c->{'reclen'} ; | |
789 | $c->{'bfname'} ; | |
790 | ||
791 | The values stored in the hashes above are mostly the direct equivalent | |
792 | of their C counterpart. Like their C counterparts, all are set to a | |
f6b705ef | 793 | default values - that means you don't have to set I<all> of the |
88108326 | 794 | values when you only want to change one. Here is an example: |
795 | ||
796 | $a = new DB_File::HASHINFO ; | |
797 | $a->{'cachesize'} = 12345 ; | |
798 | tie %y, 'DB_File', "filename", $flags, 0777, $a ; | |
799 | ||
36477c24 | 800 | A few of the options need extra discussion here. When used, the C |
88108326 | 801 | equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers |
802 | to C functions. In B<DB_File> these keys are used to store references | |
803 | to Perl subs. Below are templates for each of the subs: | |
804 | ||
805 | sub hash | |
806 | { | |
807 | my ($data) = @_ ; | |
808 | ... | |
809 | # return the hash value for $data | |
810 | return $hash ; | |
811 | } | |
3b35bae3 | 812 | |
88108326 | 813 | sub compare |
814 | { | |
815 | my ($key, $key2) = @_ ; | |
816 | ... | |
817 | # return 0 if $key1 eq $key2 | |
818 | # -1 if $key1 lt $key2 | |
819 | # 1 if $key1 gt $key2 | |
820 | return (-1 , 0 or 1) ; | |
821 | } | |
3b35bae3 | 822 | |
88108326 | 823 | sub prefix |
824 | { | |
825 | my ($key, $key2) = @_ ; | |
826 | ... | |
827 | # return number of bytes of $key2 which are | |
828 | # necessary to determine that it is greater than $key1 | |
829 | return $bytes ; | |
830 | } | |
3b35bae3 | 831 | |
f6b705ef | 832 | See L<Changing the BTREE sort order> for an example of using the |
833 | C<compare> template. | |
88108326 | 834 | |
36477c24 | 835 | If you are using the DB_RECNO interface and you intend making use of |
9a2c4ce3 | 836 | C<bval>, you should check out L<The 'bval' Option>. |
36477c24 | 837 | |
88108326 | 838 | =head2 Default Parameters |
839 | ||
840 | It is possible to omit some or all of the final 4 parameters in the | |
841 | call to C<tie> and let them take default values. As DB_HASH is the most | |
842 | common file format used, the call: | |
843 | ||
844 | tie %A, "DB_File", "filename" ; | |
845 | ||
846 | is equivalent to: | |
847 | ||
18d2dc8c | 848 | tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ; |
88108326 | 849 | |
850 | It is also possible to omit the filename parameter as well, so the | |
851 | call: | |
852 | ||
853 | tie %A, "DB_File" ; | |
854 | ||
855 | is equivalent to: | |
856 | ||
18d2dc8c | 857 | tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ; |
88108326 | 858 | |
f6b705ef | 859 | See L<In Memory Databases> for a discussion on the use of C<undef> |
88108326 | 860 | in place of a filename. |
861 | ||
f6b705ef | 862 | =head2 In Memory Databases |
863 | ||
864 | Berkeley DB allows the creation of in-memory databases by using NULL | |
865 | (that is, a C<(char *)0> in C) in place of the filename. B<DB_File> | |
866 | uses C<undef> instead of NULL to provide this functionality. | |
867 | ||
868 | =head1 DB_HASH | |
869 | ||
870 | The DB_HASH file format is probably the most commonly used of the three | |
871 | file formats that B<DB_File> supports. It is also very straightforward | |
872 | to use. | |
873 | ||
68dc0745 | 874 | =head2 A Simple Example |
f6b705ef | 875 | |
876 | This example shows how to create a database, add key/value pairs to the | |
877 | database, delete keys/value pairs and finally how to enumerate the | |
878 | contents of the database. | |
879 | ||
3245f058 | 880 | use warnings ; |
610ab055 | 881 | use strict ; |
f6b705ef | 882 | use DB_File ; |
07200f1b | 883 | our (%h, $k, $v) ; |
f6b705ef | 884 | |
2c2d71f5 | 885 | unlink "fruit" ; |
45a340cb | 886 | tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH |
f6b705ef | 887 | or die "Cannot open file 'fruit': $!\n"; |
888 | ||
889 | # Add a few key/value pairs to the file | |
890 | $h{"apple"} = "red" ; | |
891 | $h{"orange"} = "orange" ; | |
892 | $h{"banana"} = "yellow" ; | |
893 | $h{"tomato"} = "red" ; | |
894 | ||
895 | # Check for existence of a key | |
896 | print "Banana Exists\n\n" if $h{"banana"} ; | |
897 | ||
898 | # Delete a key/value pair. | |
899 | delete $h{"apple"} ; | |
900 | ||
901 | # print the contents of the file | |
902 | while (($k, $v) = each %h) | |
903 | { print "$k -> $v\n" } | |
904 | ||
905 | untie %h ; | |
906 | ||
907 | here is the output: | |
908 | ||
909 | Banana Exists | |
bbc7dcd2 | 910 | |
f6b705ef | 911 | orange -> orange |
912 | tomato -> red | |
913 | banana -> yellow | |
914 | ||
915 | Note that the like ordinary associative arrays, the order of the keys | |
916 | retrieved is in an apparently random order. | |
917 | ||
918 | =head1 DB_BTREE | |
919 | ||
920 | The DB_BTREE format is useful when you want to store data in a given | |
921 | order. By default the keys will be stored in lexical order, but as you | |
922 | will see from the example shown in the next section, it is very easy to | |
923 | define your own sorting function. | |
924 | ||
925 | =head2 Changing the BTREE sort order | |
926 | ||
927 | This script shows how to override the default sorting algorithm that | |
928 | BTREE uses. Instead of using the normal lexical ordering, a case | |
929 | insensitive compare function will be used. | |
88108326 | 930 | |
3245f058 | 931 | use warnings ; |
610ab055 | 932 | use strict ; |
f6b705ef | 933 | use DB_File ; |
610ab055 PM |
934 | |
935 | my %h ; | |
f6b705ef | 936 | |
937 | sub Compare | |
938 | { | |
939 | my ($key1, $key2) = @_ ; | |
940 | "\L$key1" cmp "\L$key2" ; | |
941 | } | |
942 | ||
943 | # specify the Perl sub that will do the comparison | |
944 | $DB_BTREE->{'compare'} = \&Compare ; | |
945 | ||
2c2d71f5 | 946 | unlink "tree" ; |
45a340cb | 947 | tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE |
f6b705ef | 948 | or die "Cannot open file 'tree': $!\n" ; |
949 | ||
950 | # Add a key/value pair to the file | |
951 | $h{'Wall'} = 'Larry' ; | |
952 | $h{'Smith'} = 'John' ; | |
953 | $h{'mouse'} = 'mickey' ; | |
954 | $h{'duck'} = 'donald' ; | |
955 | ||
956 | # Delete | |
957 | delete $h{"duck"} ; | |
958 | ||
959 | # Cycle through the keys printing them in order. | |
960 | # Note it is not necessary to sort the keys as | |
961 | # the btree will have kept them in order automatically. | |
962 | foreach (keys %h) | |
963 | { print "$_\n" } | |
964 | ||
965 | untie %h ; | |
966 | ||
967 | Here is the output from the code above. | |
968 | ||
969 | mouse | |
970 | Smith | |
971 | Wall | |
972 | ||
973 | There are a few point to bear in mind if you want to change the | |
974 | ordering in a BTREE database: | |
975 | ||
976 | =over 5 | |
977 | ||
978 | =item 1. | |
979 | ||
980 | The new compare function must be specified when you create the database. | |
981 | ||
982 | =item 2. | |
983 | ||
984 | You cannot change the ordering once the database has been created. Thus | |
985 | you must use the same compare function every time you access the | |
88108326 | 986 | database. |
987 | ||
39793c41 PM |
988 | =item 3 |
989 | ||
990 | Duplicate keys are entirely defined by the comparison function. | |
991 | In the case-insensitive example above, the keys: 'KEY' and 'key' | |
992 | would be considered duplicates, and assigning to the second one | |
52ffee89 | 993 | would overwrite the first. If duplicates are allowed for (with the |
59e51af5 | 994 | R_DUP flag discussed below), only a single copy of duplicate keys |
39793c41 PM |
995 | is stored in the database --- so (again with example above) assigning |
996 | three values to the keys: 'KEY', 'Key', and 'key' would leave just | |
997 | the first key: 'KEY' in the database with three values. For some | |
998 | situations this results in information loss, so care should be taken | |
999 | to provide fully qualified comparison functions when necessary. | |
1000 | For example, the above comparison routine could be modified to | |
1001 | additionally compare case-sensitively if two keys are equal in the | |
1002 | case insensitive comparison: | |
1003 | ||
1004 | sub compare { | |
1005 | my($key1, $key2) = @_; | |
1006 | lc $key1 cmp lc $key2 || | |
1007 | $key1 cmp $key2; | |
1008 | } | |
1009 | ||
1010 | And now you will only have duplicates when the keys themselves | |
1011 | are truly the same. (note: in versions of the db library prior to | |
1012 | about November 1996, such duplicate keys were retained so it was | |
1013 | possible to recover the original keys in sets of keys that | |
1014 | compared as equal). | |
1015 | ||
1016 | ||
f6b705ef | 1017 | =back |
1018 | ||
68dc0745 | 1019 | =head2 Handling Duplicate Keys |
f6b705ef | 1020 | |
1021 | The BTREE file type optionally allows a single key to be associated | |
1022 | with an arbitrary number of values. This option is enabled by setting | |
1023 | the flags element of C<$DB_BTREE> to R_DUP when creating the database. | |
1024 | ||
88108326 | 1025 | There are some difficulties in using the tied hash interface if you |
1026 | want to manipulate a BTREE database with duplicate keys. Consider this | |
1027 | code: | |
1028 | ||
3245f058 | 1029 | use warnings ; |
610ab055 | 1030 | use strict ; |
88108326 | 1031 | use DB_File ; |
610ab055 | 1032 | |
962cee9f | 1033 | my ($filename, %h) ; |
610ab055 | 1034 | |
88108326 | 1035 | $filename = "tree" ; |
1036 | unlink $filename ; | |
bbc7dcd2 | 1037 | |
88108326 | 1038 | # Enable duplicate records |
1039 | $DB_BTREE->{'flags'} = R_DUP ; | |
bbc7dcd2 | 1040 | |
45a340cb | 1041 | tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE |
88108326 | 1042 | or die "Cannot open $filename: $!\n"; |
bbc7dcd2 | 1043 | |
88108326 | 1044 | # Add some key/value pairs to the file |
1045 | $h{'Wall'} = 'Larry' ; | |
1046 | $h{'Wall'} = 'Brick' ; # Note the duplicate key | |
f6b705ef | 1047 | $h{'Wall'} = 'Brick' ; # Note the duplicate key and value |
88108326 | 1048 | $h{'Smith'} = 'John' ; |
1049 | $h{'mouse'} = 'mickey' ; | |
1050 | ||
1051 | # iterate through the associative array | |
1052 | # and print each key/value pair. | |
2c2d71f5 | 1053 | foreach (sort keys %h) |
88108326 | 1054 | { print "$_ -> $h{$_}\n" } |
1055 | ||
f6b705ef | 1056 | untie %h ; |
1057 | ||
88108326 | 1058 | Here is the output: |
1059 | ||
1060 | Smith -> John | |
1061 | Wall -> Larry | |
1062 | Wall -> Larry | |
f6b705ef | 1063 | Wall -> Larry |
88108326 | 1064 | mouse -> mickey |
1065 | ||
f6b705ef | 1066 | As you can see 3 records have been successfully created with key C<Wall> |
88108326 | 1067 | - the only thing is, when they are retrieved from the database they |
f6b705ef | 1068 | I<seem> to have the same value, namely C<Larry>. The problem is caused |
1069 | by the way that the associative array interface works. Basically, when | |
1070 | the associative array interface is used to fetch the value associated | |
1071 | with a given key, it will only ever retrieve the first value. | |
88108326 | 1072 | |
1073 | Although it may not be immediately obvious from the code above, the | |
1074 | associative array interface can be used to write values with duplicate | |
1075 | keys, but it cannot be used to read them back from the database. | |
1076 | ||
1077 | The way to get around this problem is to use the Berkeley DB API method | |
1078 | called C<seq>. This method allows sequential access to key/value | |
f6b705ef | 1079 | pairs. See L<THE API INTERFACE> for details of both the C<seq> method |
1080 | and the API in general. | |
88108326 | 1081 | |
1082 | Here is the script above rewritten using the C<seq> API method. | |
1083 | ||
3245f058 | 1084 | use warnings ; |
610ab055 | 1085 | use strict ; |
88108326 | 1086 | use DB_File ; |
bbc7dcd2 | 1087 | |
962cee9f | 1088 | my ($filename, $x, %h, $status, $key, $value) ; |
610ab055 | 1089 | |
88108326 | 1090 | $filename = "tree" ; |
1091 | unlink $filename ; | |
bbc7dcd2 | 1092 | |
88108326 | 1093 | # Enable duplicate records |
1094 | $DB_BTREE->{'flags'} = R_DUP ; | |
bbc7dcd2 | 1095 | |
45a340cb | 1096 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE |
88108326 | 1097 | or die "Cannot open $filename: $!\n"; |
bbc7dcd2 | 1098 | |
88108326 | 1099 | # Add some key/value pairs to the file |
1100 | $h{'Wall'} = 'Larry' ; | |
1101 | $h{'Wall'} = 'Brick' ; # Note the duplicate key | |
f6b705ef | 1102 | $h{'Wall'} = 'Brick' ; # Note the duplicate key and value |
88108326 | 1103 | $h{'Smith'} = 'John' ; |
1104 | $h{'mouse'} = 'mickey' ; | |
bbc7dcd2 | 1105 | |
f6b705ef | 1106 | # iterate through the btree using seq |
88108326 | 1107 | # and print each key/value pair. |
610ab055 | 1108 | $key = $value = 0 ; |
f6b705ef | 1109 | for ($status = $x->seq($key, $value, R_FIRST) ; |
1110 | $status == 0 ; | |
1111 | $status = $x->seq($key, $value, R_NEXT) ) | |
88108326 | 1112 | { print "$key -> $value\n" } |
bbc7dcd2 | 1113 | |
88108326 | 1114 | undef $x ; |
1115 | untie %h ; | |
1116 | ||
1117 | that prints: | |
1118 | ||
1119 | Smith -> John | |
1120 | Wall -> Brick | |
f6b705ef | 1121 | Wall -> Brick |
88108326 | 1122 | Wall -> Larry |
1123 | mouse -> mickey | |
1124 | ||
f6b705ef | 1125 | This time we have got all the key/value pairs, including the multiple |
88108326 | 1126 | values associated with the key C<Wall>. |
1127 | ||
6ca2e664 PM |
1128 | To make life easier when dealing with duplicate keys, B<DB_File> comes with |
1129 | a few utility methods. | |
1130 | ||
68dc0745 | 1131 | =head2 The get_dup() Method |
f6b705ef | 1132 | |
6ca2e664 | 1133 | The C<get_dup> method assists in |
88108326 | 1134 | reading duplicate values from BTREE databases. The method can take the |
1135 | following forms: | |
1136 | ||
1137 | $count = $x->get_dup($key) ; | |
1138 | @list = $x->get_dup($key) ; | |
1139 | %list = $x->get_dup($key, 1) ; | |
1140 | ||
1141 | In a scalar context the method returns the number of values associated | |
1142 | with the key, C<$key>. | |
1143 | ||
1144 | In list context, it returns all the values which match C<$key>. Note | |
f6b705ef | 1145 | that the values will be returned in an apparently random order. |
88108326 | 1146 | |
7a2e2cd6 | 1147 | In list context, if the second parameter is present and evaluates |
1148 | TRUE, the method returns an associative array. The keys of the | |
1149 | associative array correspond to the values that matched in the BTREE | |
1150 | and the values of the array are a count of the number of times that | |
1151 | particular value occurred in the BTREE. | |
88108326 | 1152 | |
f6b705ef | 1153 | So assuming the database created above, we can use C<get_dup> like |
88108326 | 1154 | this: |
1155 | ||
3245f058 | 1156 | use warnings ; |
2c2d71f5 JH |
1157 | use strict ; |
1158 | use DB_File ; | |
bbc7dcd2 | 1159 | |
962cee9f | 1160 | my ($filename, $x, %h) ; |
2c2d71f5 JH |
1161 | |
1162 | $filename = "tree" ; | |
bbc7dcd2 | 1163 | |
2c2d71f5 JH |
1164 | # Enable duplicate records |
1165 | $DB_BTREE->{'flags'} = R_DUP ; | |
bbc7dcd2 | 1166 | |
45a340cb | 1167 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE |
2c2d71f5 JH |
1168 | or die "Cannot open $filename: $!\n"; |
1169 | ||
610ab055 | 1170 | my $cnt = $x->get_dup("Wall") ; |
88108326 | 1171 | print "Wall occurred $cnt times\n" ; |
1172 | ||
610ab055 | 1173 | my %hash = $x->get_dup("Wall", 1) ; |
88108326 | 1174 | print "Larry is there\n" if $hash{'Larry'} ; |
f6b705ef | 1175 | print "There are $hash{'Brick'} Brick Walls\n" ; |
88108326 | 1176 | |
2c2d71f5 | 1177 | my @list = sort $x->get_dup("Wall") ; |
88108326 | 1178 | print "Wall => [@list]\n" ; |
1179 | ||
f6b705ef | 1180 | @list = $x->get_dup("Smith") ; |
88108326 | 1181 | print "Smith => [@list]\n" ; |
bbc7dcd2 | 1182 | |
f6b705ef | 1183 | @list = $x->get_dup("Dog") ; |
88108326 | 1184 | print "Dog => [@list]\n" ; |
1185 | ||
1186 | ||
1187 | and it will print: | |
1188 | ||
f6b705ef | 1189 | Wall occurred 3 times |
88108326 | 1190 | Larry is there |
f6b705ef | 1191 | There are 2 Brick Walls |
1192 | Wall => [Brick Brick Larry] | |
88108326 | 1193 | Smith => [John] |
1194 | Dog => [] | |
3b35bae3 | 1195 | |
6ca2e664 PM |
1196 | =head2 The find_dup() Method |
1197 | ||
1198 | $status = $X->find_dup($key, $value) ; | |
1199 | ||
b90e71be | 1200 | This method checks for the existence of a specific key/value pair. If the |
6ca2e664 PM |
1201 | pair exists, the cursor is left pointing to the pair and the method |
1202 | returns 0. Otherwise the method returns a non-zero value. | |
1203 | ||
1204 | Assuming the database from the previous example: | |
1205 | ||
3245f058 | 1206 | use warnings ; |
6ca2e664 PM |
1207 | use strict ; |
1208 | use DB_File ; | |
bbc7dcd2 | 1209 | |
962cee9f | 1210 | my ($filename, $x, %h, $found) ; |
6ca2e664 | 1211 | |
07200f1b | 1212 | $filename = "tree" ; |
bbc7dcd2 | 1213 | |
6ca2e664 PM |
1214 | # Enable duplicate records |
1215 | $DB_BTREE->{'flags'} = R_DUP ; | |
bbc7dcd2 | 1216 | |
45a340cb | 1217 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE |
6ca2e664 PM |
1218 | or die "Cannot open $filename: $!\n"; |
1219 | ||
1220 | $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; | |
1221 | print "Larry Wall is $found there\n" ; | |
bbc7dcd2 | 1222 | |
6ca2e664 PM |
1223 | $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ; |
1224 | print "Harry Wall is $found there\n" ; | |
bbc7dcd2 | 1225 | |
6ca2e664 PM |
1226 | undef $x ; |
1227 | untie %h ; | |
1228 | ||
1229 | prints this | |
1230 | ||
2c2d71f5 | 1231 | Larry Wall is there |
6ca2e664 PM |
1232 | Harry Wall is not there |
1233 | ||
1234 | ||
1235 | =head2 The del_dup() Method | |
1236 | ||
1237 | $status = $X->del_dup($key, $value) ; | |
1238 | ||
1239 | This method deletes a specific key/value pair. It returns | |
1240 | 0 if they exist and have been deleted successfully. | |
1241 | Otherwise the method returns a non-zero value. | |
1242 | ||
b90e71be | 1243 | Again assuming the existence of the C<tree> database |
6ca2e664 | 1244 | |
3245f058 | 1245 | use warnings ; |
6ca2e664 PM |
1246 | use strict ; |
1247 | use DB_File ; | |
bbc7dcd2 | 1248 | |
962cee9f | 1249 | my ($filename, $x, %h, $found) ; |
6ca2e664 | 1250 | |
07200f1b | 1251 | $filename = "tree" ; |
bbc7dcd2 | 1252 | |
6ca2e664 PM |
1253 | # Enable duplicate records |
1254 | $DB_BTREE->{'flags'} = R_DUP ; | |
bbc7dcd2 | 1255 | |
45a340cb | 1256 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE |
6ca2e664 PM |
1257 | or die "Cannot open $filename: $!\n"; |
1258 | ||
1259 | $x->del_dup("Wall", "Larry") ; | |
1260 | ||
1261 | $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; | |
1262 | print "Larry Wall is $found there\n" ; | |
bbc7dcd2 | 1263 | |
6ca2e664 PM |
1264 | undef $x ; |
1265 | untie %h ; | |
1266 | ||
1267 | prints this | |
1268 | ||
1269 | Larry Wall is not there | |
1270 | ||
f6b705ef | 1271 | =head2 Matching Partial Keys |
1272 | ||
1273 | The BTREE interface has a feature which allows partial keys to be | |
1274 | matched. This functionality is I<only> available when the C<seq> method | |
1275 | is used along with the R_CURSOR flag. | |
1276 | ||
1277 | $x->seq($key, $value, R_CURSOR) ; | |
1278 | ||
1279 | Here is the relevant quote from the dbopen man page where it defines | |
1280 | the use of the R_CURSOR flag with seq: | |
1281 | ||
f6b705ef | 1282 | Note, for the DB_BTREE access method, the returned key is not |
1283 | necessarily an exact match for the specified key. The returned key | |
1284 | is the smallest key greater than or equal to the specified key, | |
1285 | permitting partial key matches and range searches. | |
1286 | ||
f6b705ef | 1287 | In the example script below, the C<match> sub uses this feature to find |
1288 | and print the first matching key/value pair given a partial key. | |
1289 | ||
3245f058 | 1290 | use warnings ; |
610ab055 | 1291 | use strict ; |
f6b705ef | 1292 | use DB_File ; |
1293 | use Fcntl ; | |
610ab055 | 1294 | |
962cee9f | 1295 | my ($filename, $x, %h, $st, $key, $value) ; |
f6b705ef | 1296 | |
1297 | sub match | |
1298 | { | |
1299 | my $key = shift ; | |
610ab055 | 1300 | my $value = 0; |
f6b705ef | 1301 | my $orig_key = $key ; |
1302 | $x->seq($key, $value, R_CURSOR) ; | |
1303 | print "$orig_key\t-> $key\t-> $value\n" ; | |
1304 | } | |
1305 | ||
1306 | $filename = "tree" ; | |
1307 | unlink $filename ; | |
1308 | ||
45a340cb | 1309 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE |
f6b705ef | 1310 | or die "Cannot open $filename: $!\n"; |
bbc7dcd2 | 1311 | |
f6b705ef | 1312 | # Add some key/value pairs to the file |
1313 | $h{'mouse'} = 'mickey' ; | |
1314 | $h{'Wall'} = 'Larry' ; | |
1315 | $h{'Walls'} = 'Brick' ; | |
1316 | $h{'Smith'} = 'John' ; | |
bbc7dcd2 | 1317 | |
f6b705ef | 1318 | |
610ab055 | 1319 | $key = $value = 0 ; |
f6b705ef | 1320 | print "IN ORDER\n" ; |
1321 | for ($st = $x->seq($key, $value, R_FIRST) ; | |
1322 | $st == 0 ; | |
1323 | $st = $x->seq($key, $value, R_NEXT) ) | |
bbc7dcd2 | 1324 | |
2c2d71f5 | 1325 | { print "$key -> $value\n" } |
bbc7dcd2 | 1326 | |
f6b705ef | 1327 | print "\nPARTIAL MATCH\n" ; |
1328 | ||
1329 | match "Wa" ; | |
1330 | match "A" ; | |
1331 | match "a" ; | |
1332 | ||
1333 | undef $x ; | |
1334 | untie %h ; | |
1335 | ||
1336 | Here is the output: | |
1337 | ||
1338 | IN ORDER | |
1339 | Smith -> John | |
1340 | Wall -> Larry | |
1341 | Walls -> Brick | |
1342 | mouse -> mickey | |
1343 | ||
1344 | PARTIAL MATCH | |
1345 | Wa -> Wall -> Larry | |
1346 | A -> Smith -> John | |
1347 | a -> mouse -> mickey | |
1348 | ||
1349 | =head1 DB_RECNO | |
1350 | ||
1351 | DB_RECNO provides an interface to flat text files. Both variable and | |
1352 | fixed length records are supported. | |
3b35bae3 | 1353 | |
6ca2e664 | 1354 | In order to make RECNO more compatible with Perl, the array offset for |
88108326 | 1355 | all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. |
3b35bae3 | 1356 | |
88108326 | 1357 | As with normal Perl arrays, a RECNO array can be accessed using |
1358 | negative indexes. The index -1 refers to the last element of the array, | |
1359 | -2 the second last, and so on. Attempting to access an element before | |
1360 | the start of the array will raise a fatal run-time error. | |
3b35bae3 | 1361 | |
68dc0745 | 1362 | =head2 The 'bval' Option |
36477c24 | 1363 | |
1364 | The operation of the bval option warrants some discussion. Here is the | |
1365 | definition of bval from the Berkeley DB 1.85 recno manual page: | |
1366 | ||
1367 | The delimiting byte to be used to mark the end of a | |
1368 | record for variable-length records, and the pad charac- | |
1369 | ter for fixed-length records. If no value is speci- | |
1370 | fied, newlines (``\n'') are used to mark the end of | |
1371 | variable-length records and fixed-length records are | |
1372 | padded with spaces. | |
1373 | ||
1374 | The second sentence is wrong. In actual fact bval will only default to | |
1375 | C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL | |
1376 | openinfo parameter is used at all, the value that happens to be in bval | |
1377 | will be used. That means you always have to specify bval when making | |
1378 | use of any of the options in the openinfo parameter. This documentation | |
1379 | error will be fixed in the next release of Berkeley DB. | |
1380 | ||
1381 | That clarifies the situation with regards Berkeley DB itself. What | |
1382 | about B<DB_File>? Well, the behavior defined in the quote above is | |
6ca2e664 | 1383 | quite useful, so B<DB_File> conforms to it. |
36477c24 | 1384 | |
1385 | That means that you can specify other options (e.g. cachesize) and | |
1386 | still have bval default to C<"\n"> for variable length records, and | |
1387 | space for fixed length records. | |
1388 | ||
c5da4faf | 1389 | Also note that the bval option only allows you to specify a single byte |
a6d6498e | 1390 | as a delimiter. |
c5da4faf | 1391 | |
f6b705ef | 1392 | =head2 A Simple Example |
3b35bae3 | 1393 | |
6ca2e664 PM |
1394 | Here is a simple example that uses RECNO (if you are using a version |
1395 | of Perl earlier than 5.004_57 this example won't work -- see | |
1396 | L<Extra RECNO Methods> for a workaround). | |
f6b705ef | 1397 | |
3245f058 | 1398 | use warnings ; |
610ab055 | 1399 | use strict ; |
f6b705ef | 1400 | use DB_File ; |
f6b705ef | 1401 | |
2c2d71f5 JH |
1402 | my $filename = "text" ; |
1403 | unlink $filename ; | |
1404 | ||
610ab055 | 1405 | my @h ; |
45a340cb | 1406 | tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO |
f6b705ef | 1407 | or die "Cannot open file 'text': $!\n" ; |
1408 | ||
1409 | # Add a few key/value pairs to the file | |
1410 | $h[0] = "orange" ; | |
1411 | $h[1] = "blue" ; | |
1412 | $h[2] = "yellow" ; | |
1413 | ||
6ca2e664 PM |
1414 | push @h, "green", "black" ; |
1415 | ||
1416 | my $elements = scalar @h ; | |
1417 | print "The array contains $elements entries\n" ; | |
1418 | ||
1419 | my $last = pop @h ; | |
1420 | print "popped $last\n" ; | |
1421 | ||
1422 | unshift @h, "white" ; | |
1423 | my $first = shift @h ; | |
1424 | print "shifted $first\n" ; | |
1425 | ||
f6b705ef | 1426 | # Check for existence of a key |
1427 | print "Element 1 Exists with value $h[1]\n" if $h[1] ; | |
1428 | ||
1429 | # use a negative index | |
1430 | print "The last element is $h[-1]\n" ; | |
1431 | print "The 2nd last element is $h[-2]\n" ; | |
1432 | ||
1433 | untie @h ; | |
3b35bae3 | 1434 | |
f6b705ef | 1435 | Here is the output from the script: |
1436 | ||
6ca2e664 PM |
1437 | The array contains 5 entries |
1438 | popped black | |
2c2d71f5 | 1439 | shifted white |
f6b705ef | 1440 | Element 1 Exists with value blue |
6ca2e664 PM |
1441 | The last element is green |
1442 | The 2nd last element is yellow | |
f6b705ef | 1443 | |
6ca2e664 | 1444 | =head2 Extra RECNO Methods |
f6b705ef | 1445 | |
045291aa | 1446 | If you are using a version of Perl earlier than 5.004_57, the tied |
6ca2e664 PM |
1447 | array interface is quite limited. In the example script above |
1448 | C<push>, C<pop>, C<shift>, C<unshift> | |
1449 | or determining the array length will not work with a tied array. | |
045291aa PM |
1450 | |
1451 | To make the interface more useful for older versions of Perl, a number | |
1452 | of methods are supplied with B<DB_File> to simulate the missing array | |
1453 | operations. All these methods are accessed via the object returned from | |
1454 | the tie call. | |
f6b705ef | 1455 | |
1456 | Here are the methods: | |
1457 | ||
1458 | =over 5 | |
3b35bae3 | 1459 | |
f6b705ef | 1460 | =item B<$X-E<gt>push(list) ;> |
1461 | ||
1462 | Pushes the elements of C<list> to the end of the array. | |
1463 | ||
1464 | =item B<$value = $X-E<gt>pop ;> | |
1465 | ||
1466 | Removes and returns the last element of the array. | |
1467 | ||
1468 | =item B<$X-E<gt>shift> | |
1469 | ||
1470 | Removes and returns the first element of the array. | |
1471 | ||
1472 | =item B<$X-E<gt>unshift(list) ;> | |
1473 | ||
1474 | Pushes the elements of C<list> to the start of the array. | |
1475 | ||
1476 | =item B<$X-E<gt>length> | |
1477 | ||
1478 | Returns the number of elements in the array. | |
1479 | ||
c5da4faf PM |
1480 | =item B<$X-E<gt>splice(offset, length, elements);> |
1481 | ||
a6d05634 | 1482 | Returns a splice of the array. |
c5da4faf | 1483 | |
f6b705ef | 1484 | =back |
1485 | ||
1486 | =head2 Another Example | |
1487 | ||
1488 | Here is a more complete example that makes use of some of the methods | |
1489 | described above. It also makes use of the API interface directly (see | |
1490 | L<THE API INTERFACE>). | |
1491 | ||
3245f058 | 1492 | use warnings ; |
f6b705ef | 1493 | use strict ; |
962cee9f | 1494 | my (@h, $H, $file, $i) ; |
f6b705ef | 1495 | use DB_File ; |
1496 | use Fcntl ; | |
bbc7dcd2 | 1497 | |
f6b705ef | 1498 | $file = "text" ; |
1499 | ||
1500 | unlink $file ; | |
1501 | ||
45a340cb | 1502 | $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO |
f6b705ef | 1503 | or die "Cannot open file $file: $!\n" ; |
bbc7dcd2 | 1504 | |
f6b705ef | 1505 | # first create a text file to play with |
1506 | $h[0] = "zero" ; | |
1507 | $h[1] = "one" ; | |
1508 | $h[2] = "two" ; | |
1509 | $h[3] = "three" ; | |
1510 | $h[4] = "four" ; | |
1511 | ||
bbc7dcd2 | 1512 | |
f6b705ef | 1513 | # Print the records in order. |
1514 | # | |
1515 | # The length method is needed here because evaluating a tied | |
1516 | # array in a scalar context does not return the number of | |
1517 | # elements in the array. | |
1518 | ||
1519 | print "\nORIGINAL\n" ; | |
1520 | foreach $i (0 .. $H->length - 1) { | |
1521 | print "$i: $h[$i]\n" ; | |
1522 | } | |
1523 | ||
1524 | # use the push & pop methods | |
1525 | $a = $H->pop ; | |
1526 | $H->push("last") ; | |
1527 | print "\nThe last record was [$a]\n" ; | |
1528 | ||
1529 | # and the shift & unshift methods | |
1530 | $a = $H->shift ; | |
1531 | $H->unshift("first") ; | |
1532 | print "The first record was [$a]\n" ; | |
1533 | ||
1534 | # Use the API to add a new record after record 2. | |
1535 | $i = 2 ; | |
1536 | $H->put($i, "Newbie", R_IAFTER) ; | |
1537 | ||
1538 | # and a new record before record 1. | |
1539 | $i = 1 ; | |
1540 | $H->put($i, "New One", R_IBEFORE) ; | |
1541 | ||
1542 | # delete record 3 | |
1543 | $H->del(3) ; | |
1544 | ||
1545 | # now print the records in reverse order | |
1546 | print "\nREVERSE\n" ; | |
1547 | for ($i = $H->length - 1 ; $i >= 0 ; -- $i) | |
1548 | { print "$i: $h[$i]\n" } | |
1549 | ||
1550 | # same again, but use the API functions instead | |
1551 | print "\nREVERSE again\n" ; | |
610ab055 | 1552 | my ($s, $k, $v) = (0, 0, 0) ; |
f6b705ef | 1553 | for ($s = $H->seq($k, $v, R_LAST) ; |
1554 | $s == 0 ; | |
1555 | $s = $H->seq($k, $v, R_PREV)) | |
1556 | { print "$k: $v\n" } | |
1557 | ||
1558 | undef $H ; | |
1559 | untie @h ; | |
1560 | ||
1561 | and this is what it outputs: | |
1562 | ||
1563 | ORIGINAL | |
1564 | 0: zero | |
1565 | 1: one | |
1566 | 2: two | |
1567 | 3: three | |
1568 | 4: four | |
1569 | ||
1570 | The last record was [four] | |
1571 | The first record was [zero] | |
1572 | ||
1573 | REVERSE | |
1574 | 5: last | |
1575 | 4: three | |
1576 | 3: Newbie | |
1577 | 2: one | |
1578 | 1: New One | |
1579 | 0: first | |
1580 | ||
1581 | REVERSE again | |
1582 | 5: last | |
1583 | 4: three | |
1584 | 3: Newbie | |
1585 | 2: one | |
1586 | 1: New One | |
1587 | 0: first | |
1588 | ||
1589 | Notes: | |
1590 | ||
1591 | =over 5 | |
1592 | ||
1593 | =item 1. | |
1594 | ||
1595 | Rather than iterating through the array, C<@h> like this: | |
1596 | ||
1597 | foreach $i (@h) | |
1598 | ||
1599 | it is necessary to use either this: | |
1600 | ||
1601 | foreach $i (0 .. $H->length - 1) | |
1602 | ||
1603 | or this: | |
1604 | ||
1605 | for ($a = $H->get($k, $v, R_FIRST) ; | |
1606 | $a == 0 ; | |
1607 | $a = $H->get($k, $v, R_NEXT) ) | |
1608 | ||
1609 | =item 2. | |
1610 | ||
1611 | Notice that both times the C<put> method was used the record index was | |
1612 | specified using a variable, C<$i>, rather than the literal value | |
1613 | itself. This is because C<put> will return the record number of the | |
1614 | inserted line via that parameter. | |
1615 | ||
1616 | =back | |
1617 | ||
1618 | =head1 THE API INTERFACE | |
3b35bae3 AD |
1619 | |
1620 | As well as accessing Berkeley DB using a tied hash or array, it is also | |
88108326 | 1621 | possible to make direct use of most of the API functions defined in the |
8e07c86e | 1622 | Berkeley DB documentation. |
3b35bae3 | 1623 | |
88108326 | 1624 | To do this you need to store a copy of the object returned from the tie. |
3b35bae3 | 1625 | |
88108326 | 1626 | $db = tie %hash, "DB_File", "filename" ; |
3b35bae3 | 1627 | |
8e07c86e | 1628 | Once you have done that, you can access the Berkeley DB API functions |
88108326 | 1629 | as B<DB_File> methods directly like this: |
3b35bae3 AD |
1630 | |
1631 | $db->put($key, $value, R_NOOVERWRITE) ; | |
1632 | ||
88108326 | 1633 | B<Important:> If you have saved a copy of the object returned from |
1634 | C<tie>, the underlying database file will I<not> be closed until both | |
1635 | the tied variable is untied and all copies of the saved object are | |
610ab055 | 1636 | destroyed. |
88108326 | 1637 | |
1638 | use DB_File ; | |
1639 | $db = tie %hash, "DB_File", "filename" | |
1640 | or die "Cannot tie filename: $!" ; | |
1641 | ... | |
1642 | undef $db ; | |
1643 | untie %hash ; | |
1644 | ||
9a2c4ce3 | 1645 | See L<The untie() Gotcha> for more details. |
778183f3 | 1646 | |
88108326 | 1647 | All the functions defined in L<dbopen> are available except for |
1648 | close() and dbopen() itself. The B<DB_File> method interface to the | |
1649 | supported functions have been implemented to mirror the way Berkeley DB | |
1650 | works whenever possible. In particular note that: | |
1651 | ||
1652 | =over 5 | |
1653 | ||
1654 | =item * | |
1655 | ||
1656 | The methods return a status value. All return 0 on success. | |
1657 | All return -1 to signify an error and set C<$!> to the exact | |
1658 | error code. The return code 1 generally (but not always) means that the | |
1659 | key specified did not exist in the database. | |
1660 | ||
1661 | Other return codes are defined. See below and in the Berkeley DB | |
1662 | documentation for details. The Berkeley DB documentation should be used | |
1663 | as the definitive source. | |
1664 | ||
1665 | =item * | |
3b35bae3 | 1666 | |
88108326 | 1667 | Whenever a Berkeley DB function returns data via one of its parameters, |
1668 | the equivalent B<DB_File> method does exactly the same. | |
3b35bae3 | 1669 | |
88108326 | 1670 | =item * |
1671 | ||
1672 | If you are careful, it is possible to mix API calls with the tied | |
1673 | hash/array interface in the same piece of code. Although only a few of | |
1674 | the methods used to implement the tied interface currently make use of | |
1675 | the cursor, you should always assume that the cursor has been changed | |
1676 | any time the tied hash/array interface is used. As an example, this | |
1677 | code will probably not do what you expect: | |
1678 | ||
1679 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE | |
1680 | or die "Cannot tie $filename: $!" ; | |
1681 | ||
1682 | # Get the first key/value pair and set the cursor | |
1683 | $X->seq($key, $value, R_FIRST) ; | |
1684 | ||
1685 | # this line will modify the cursor | |
1686 | $count = scalar keys %x ; | |
1687 | ||
1688 | # Get the second key/value pair. | |
1689 | # oops, it didn't, it got the last key/value pair! | |
1690 | $X->seq($key, $value, R_NEXT) ; | |
1691 | ||
1692 | The code above can be rearranged to get around the problem, like this: | |
1693 | ||
1694 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE | |
1695 | or die "Cannot tie $filename: $!" ; | |
1696 | ||
1697 | # this line will modify the cursor | |
1698 | $count = scalar keys %x ; | |
1699 | ||
1700 | # Get the first key/value pair and set the cursor | |
1701 | $X->seq($key, $value, R_FIRST) ; | |
1702 | ||
1703 | # Get the second key/value pair. | |
1704 | # worked this time. | |
1705 | $X->seq($key, $value, R_NEXT) ; | |
1706 | ||
1707 | =back | |
1708 | ||
1709 | All the constants defined in L<dbopen> for use in the flags parameters | |
1710 | in the methods defined below are also available. Refer to the Berkeley | |
1711 | DB documentation for the precise meaning of the flags values. | |
1712 | ||
1713 | Below is a list of the methods available. | |
3b35bae3 AD |
1714 | |
1715 | =over 5 | |
1716 | ||
f6b705ef | 1717 | =item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> |
88108326 | 1718 | |
1719 | Given a key (C<$key>) this method reads the value associated with it | |
1720 | from the database. The value read from the database is returned in the | |
1721 | C<$value> parameter. | |
3b35bae3 | 1722 | |
88108326 | 1723 | If the key does not exist the method returns 1. |
3b35bae3 | 1724 | |
88108326 | 1725 | No flags are currently defined for this method. |
3b35bae3 | 1726 | |
f6b705ef | 1727 | =item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> |
3b35bae3 | 1728 | |
88108326 | 1729 | Stores the key/value pair in the database. |
1730 | ||
1731 | If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter | |
8e07c86e | 1732 | will have the record number of the inserted key/value pair set. |
3b35bae3 | 1733 | |
88108326 | 1734 | Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and |
1735 | R_SETCURSOR. | |
1736 | ||
f6b705ef | 1737 | =item B<$status = $X-E<gt>del($key [, $flags]) ;> |
3b35bae3 | 1738 | |
88108326 | 1739 | Removes all key/value pairs with key C<$key> from the database. |
3b35bae3 | 1740 | |
88108326 | 1741 | A return code of 1 means that the requested key was not in the |
1742 | database. | |
3b35bae3 | 1743 | |
88108326 | 1744 | R_CURSOR is the only valid flag at present. |
3b35bae3 | 1745 | |
f6b705ef | 1746 | =item B<$status = $X-E<gt>fd ;> |
3b35bae3 | 1747 | |
88108326 | 1748 | Returns the file descriptor for the underlying database. |
3b35bae3 | 1749 | |
b90e71be GS |
1750 | See L<Locking: The Trouble with fd> for an explanation for why you should |
1751 | not use C<fd> to lock your database. | |
3b35bae3 | 1752 | |
f6b705ef | 1753 | =item B<$status = $X-E<gt>seq($key, $value, $flags) ;> |
3b35bae3 | 1754 | |
88108326 | 1755 | This interface allows sequential retrieval from the database. See |
1756 | L<dbopen> for full details. | |
1757 | ||
1758 | Both the C<$key> and C<$value> parameters will be set to the key/value | |
1759 | pair read from the database. | |
1760 | ||
1761 | The flags parameter is mandatory. The valid flag values are R_CURSOR, | |
1762 | R_FIRST, R_LAST, R_NEXT and R_PREV. | |
1763 | ||
f6b705ef | 1764 | =item B<$status = $X-E<gt>sync([$flags]) ;> |
88108326 | 1765 | |
1766 | Flushes any cached buffers to disk. | |
1767 | ||
1768 | R_RECNOSYNC is the only valid flag at present. | |
3b35bae3 AD |
1769 | |
1770 | =back | |
1771 | ||
cad2e5aa JH |
1772 | =head1 DBM FILTERS |
1773 | ||
1774 | A DBM Filter is a piece of code that is be used when you I<always> | |
1775 | want to make the same transformation to all keys and/or values in a | |
1776 | DBM database. | |
1777 | ||
1778 | There are four methods associated with DBM Filters. All work identically, | |
1779 | and each is used to install (or uninstall) a single DBM Filter. Each | |
1780 | expects a single parameter, namely a reference to a sub. The only | |
1781 | difference between them is the place that the filter is installed. | |
1782 | ||
1783 | To summarise: | |
1784 | ||
1785 | =over 5 | |
1786 | ||
1787 | =item B<filter_store_key> | |
1788 | ||
1789 | If a filter has been installed with this method, it will be invoked | |
1790 | every time you write a key to a DBM database. | |
1791 | ||
1792 | =item B<filter_store_value> | |
1793 | ||
1794 | If a filter has been installed with this method, it will be invoked | |
1795 | every time you write a value to a DBM database. | |
1796 | ||
1797 | ||
1798 | =item B<filter_fetch_key> | |
1799 | ||
1800 | If a filter has been installed with this method, it will be invoked | |
1801 | every time you read a key from a DBM database. | |
1802 | ||
1803 | =item B<filter_fetch_value> | |
1804 | ||
1805 | If a filter has been installed with this method, it will be invoked | |
1806 | every time you read a value from a DBM database. | |
1807 | ||
1808 | =back | |
1809 | ||
1810 | You can use any combination of the methods, from none, to all four. | |
1811 | ||
1812 | All filter methods return the existing filter, if present, or C<undef> | |
1813 | in not. | |
1814 | ||
1815 | To delete a filter pass C<undef> to it. | |
1816 | ||
1817 | =head2 The Filter | |
1818 | ||
1819 | When each filter is called by Perl, a local copy of C<$_> will contain | |
1820 | the key or value to be filtered. Filtering is achieved by modifying | |
1821 | the contents of C<$_>. The return code from the filter is ignored. | |
1822 | ||
1823 | =head2 An Example -- the NULL termination problem. | |
1824 | ||
1825 | Consider the following scenario. You have a DBM database | |
1826 | that you need to share with a third-party C application. The C application | |
1827 | assumes that I<all> keys and values are NULL terminated. Unfortunately | |
1828 | when Perl writes to DBM databases it doesn't use NULL termination, so | |
1829 | your Perl application will have to manage NULL termination itself. When | |
1830 | you write to the database you will have to use something like this: | |
1831 | ||
1832 | $hash{"$key\0"} = "$value\0" ; | |
1833 | ||
1834 | Similarly the NULL needs to be taken into account when you are considering | |
1835 | the length of existing keys/values. | |
1836 | ||
1837 | It would be much better if you could ignore the NULL terminations issue | |
1838 | in the main application code and have a mechanism that automatically | |
1839 | added the terminating NULL to all keys and values whenever you write to | |
1840 | the database and have them removed when you read from the database. As I'm | |
1841 | sure you have already guessed, this is a problem that DBM Filters can | |
1842 | fix very easily. | |
1843 | ||
3245f058 | 1844 | use warnings ; |
cad2e5aa JH |
1845 | use strict ; |
1846 | use DB_File ; | |
1847 | ||
1848 | my %hash ; | |
2359510d | 1849 | my $filename = "filt" ; |
cad2e5aa JH |
1850 | unlink $filename ; |
1851 | ||
1852 | my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH | |
1853 | or die "Cannot open $filename: $!\n" ; | |
1854 | ||
1855 | # Install DBM Filters | |
1856 | $db->filter_fetch_key ( sub { s/\0$// } ) ; | |
1857 | $db->filter_store_key ( sub { $_ .= "\0" } ) ; | |
1858 | $db->filter_fetch_value( sub { s/\0$// } ) ; | |
1859 | $db->filter_store_value( sub { $_ .= "\0" } ) ; | |
1860 | ||
1861 | $hash{"abc"} = "def" ; | |
1862 | my $a = $hash{"ABC"} ; | |
1863 | # ... | |
1864 | undef $db ; | |
1865 | untie %hash ; | |
1866 | ||
1867 | Hopefully the contents of each of the filters should be | |
1868 | self-explanatory. Both "fetch" filters remove the terminating NULL, | |
1869 | and both "store" filters add a terminating NULL. | |
1870 | ||
1871 | ||
1872 | =head2 Another Example -- Key is a C int. | |
1873 | ||
1874 | Here is another real-life example. By default, whenever Perl writes to | |
1875 | a DBM database it always writes the key and value as strings. So when | |
1876 | you use this: | |
1877 | ||
3c4b39be | 1878 | $hash{12345} = "something" ; |
cad2e5aa JH |
1879 | |
1880 | the key 12345 will get stored in the DBM database as the 5 byte string | |
1881 | "12345". If you actually want the key to be stored in the DBM database | |
1882 | as a C int, you will have to use C<pack> when writing, and C<unpack> | |
1883 | when reading. | |
1884 | ||
1885 | Here is a DBM Filter that does it: | |
1886 | ||
3245f058 | 1887 | use warnings ; |
cad2e5aa JH |
1888 | use strict ; |
1889 | use DB_File ; | |
1890 | my %hash ; | |
2359510d | 1891 | my $filename = "filt" ; |
cad2e5aa JH |
1892 | unlink $filename ; |
1893 | ||
1894 | ||
1895 | my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH | |
1896 | or die "Cannot open $filename: $!\n" ; | |
1897 | ||
1898 | $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; | |
1899 | $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; | |
1900 | $hash{123} = "def" ; | |
1901 | # ... | |
1902 | undef $db ; | |
1903 | untie %hash ; | |
1904 | ||
1905 | This time only two filters have been used -- we only need to manipulate | |
1906 | the contents of the key, so it wasn't necessary to install any value | |
1907 | filters. | |
1908 | ||
f6b705ef | 1909 | =head1 HINTS AND TIPS |
3b35bae3 | 1910 | |
3b35bae3 | 1911 | |
b90e71be | 1912 | =head2 Locking: The Trouble with fd |
3b35bae3 | 1913 | |
b90e71be GS |
1914 | Until version 1.72 of this module, the recommended technique for locking |
1915 | B<DB_File> databases was to flock the filehandle returned from the "fd" | |
1916 | function. Unfortunately this technique has been shown to be fundamentally | |
1917 | flawed (Kudos to David Harris for tracking this down). Use it at your own | |
1918 | peril! | |
3b35bae3 | 1919 | |
b90e71be | 1920 | The locking technique went like this. |
cb1a09d0 | 1921 | |
2359510d SD |
1922 | $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644) |
1923 | || die "dbcreat foo.db $!"; | |
b90e71be GS |
1924 | $fd = $db->fd; |
1925 | open(DB_FH, "+<&=$fd") || die "dup $!"; | |
1926 | flock (DB_FH, LOCK_EX) || die "flock: $!"; | |
1927 | ... | |
1928 | $db{"Tom"} = "Jerry" ; | |
1929 | ... | |
1930 | flock(DB_FH, LOCK_UN); | |
1931 | undef $db; | |
1932 | untie %db; | |
1933 | close(DB_FH); | |
cb1a09d0 | 1934 | |
b90e71be | 1935 | In simple terms, this is what happens: |
cb1a09d0 | 1936 | |
b90e71be | 1937 | =over 5 |
cb1a09d0 | 1938 | |
b90e71be | 1939 | =item 1. |
cb1a09d0 | 1940 | |
b90e71be | 1941 | Use "tie" to open the database. |
cb1a09d0 | 1942 | |
b90e71be | 1943 | =item 2. |
cb1a09d0 | 1944 | |
b90e71be | 1945 | Lock the database with fd & flock. |
cb1a09d0 | 1946 | |
b90e71be | 1947 | =item 3. |
cb1a09d0 | 1948 | |
b90e71be | 1949 | Read & Write to the database. |
cb1a09d0 | 1950 | |
b90e71be | 1951 | =item 4. |
cb1a09d0 | 1952 | |
b90e71be | 1953 | Unlock and close the database. |
cb1a09d0 | 1954 | |
b90e71be GS |
1955 | =back |
1956 | ||
1957 | Here is the crux of the problem. A side-effect of opening the B<DB_File> | |
1958 | database in step 2 is that an initial block from the database will get | |
1959 | read from disk and cached in memory. | |
1960 | ||
1961 | To see why this is a problem, consider what can happen when two processes, | |
1962 | say "A" and "B", both want to update the same B<DB_File> database | |
1963 | using the locking steps outlined above. Assume process "A" has already | |
1964 | opened the database and has a write lock, but it hasn't actually updated | |
1965 | the database yet (it has finished step 2, but not started step 3 yet). Now | |
1966 | process "B" tries to open the same database - step 1 will succeed, | |
1967 | but it will block on step 2 until process "A" releases the lock. The | |
1968 | important thing to notice here is that at this point in time both | |
1969 | processes will have cached identical initial blocks from the database. | |
1970 | ||
1971 | Now process "A" updates the database and happens to change some of the | |
1972 | data held in the initial buffer. Process "A" terminates, flushing | |
1973 | all cached data to disk and releasing the database lock. At this point | |
1974 | the database on disk will correctly reflect the changes made by process | |
1975 | "A". | |
1976 | ||
1977 | With the lock released, process "B" can now continue. It also updates the | |
1978 | database and unfortunately it too modifies the data that was in its | |
1979 | initial buffer. Once that data gets flushed to disk it will overwrite | |
1980 | some/all of the changes process "A" made to the database. | |
1981 | ||
1982 | The result of this scenario is at best a database that doesn't contain | |
1983 | what you expect. At worst the database will corrupt. | |
1984 | ||
1985 | The above won't happen every time competing process update the same | |
1986 | B<DB_File> database, but it does illustrate why the technique should | |
1987 | not be used. | |
1988 | ||
1989 | =head2 Safe ways to lock a database | |
1990 | ||
1991 | Starting with version 2.x, Berkeley DB has internal support for locking. | |
1992 | The companion module to this one, B<BerkeleyDB>, provides an interface | |
1993 | to this locking functionality. If you are serious about locking | |
1994 | Berkeley DB databases, I strongly recommend using B<BerkeleyDB>. | |
1995 | ||
1996 | If using B<BerkeleyDB> isn't an option, there are a number of modules | |
1997 | available on CPAN that can be used to implement locking. Each one | |
1998 | implements locking differently and has different goals in mind. It is | |
1999 | therefore worth knowing the difference, so that you can pick the right | |
2000 | one for your application. Here are the three locking wrappers: | |
2001 | ||
2002 | =over 5 | |
2003 | ||
2004 | =item B<Tie::DB_Lock> | |
2005 | ||
2006 | A B<DB_File> wrapper which creates copies of the database file for | |
2007 | read access, so that you have a kind of a multiversioning concurrent read | |
2008 | system. However, updates are still serial. Use for databases where reads | |
2009 | may be lengthy and consistency problems may occur. | |
2010 | ||
2011 | =item B<Tie::DB_LockFile> | |
2012 | ||
2013 | A B<DB_File> wrapper that has the ability to lock and unlock the database | |
2014 | while it is being used. Avoids the tie-before-flock problem by simply | |
2015 | re-tie-ing the database when you get or drop a lock. Because of the | |
2016 | flexibility in dropping and re-acquiring the lock in the middle of a | |
2017 | session, this can be massaged into a system that will work with long | |
2018 | updates and/or reads if the application follows the hints in the POD | |
2019 | documentation. | |
2020 | ||
2021 | =item B<DB_File::Lock> | |
2022 | ||
2023 | An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile | |
2024 | before tie-ing the database and drops the lock after the untie. Allows | |
2025 | one to use the same lockfile for multiple databases to avoid deadlock | |
2026 | problems, if desired. Use for databases where updates are reads are | |
2027 | quick and simple flock locking semantics are enough. | |
2028 | ||
2029 | =back | |
cb1a09d0 | 2030 | |
68dc0745 | 2031 | =head2 Sharing Databases With C Applications |
f6b705ef | 2032 | |
2033 | There is no technical reason why a Berkeley DB database cannot be | |
2034 | shared by both a Perl and a C application. | |
2035 | ||
2036 | The vast majority of problems that are reported in this area boil down | |
2037 | to the fact that C strings are NULL terminated, whilst Perl strings are | |
cad2e5aa | 2038 | not. See L<DBM FILTERS> for a generic way to work around this problem. |
f6b705ef | 2039 | |
2040 | Here is a real example. Netscape 2.0 keeps a record of the locations you | |
2041 | visit along with the time you last visited them in a DB_HASH database. | |
2042 | This is usually stored in the file F<~/.netscape/history.db>. The key | |
2043 | field in the database is the location string and the value field is the | |
2044 | time the location was last visited stored as a 4 byte binary value. | |
2045 | ||
2046 | If you haven't already guessed, the location string is stored with a | |
2047 | terminating NULL. This means you need to be careful when accessing the | |
2048 | database. | |
2049 | ||
2050 | Here is a snippet of code that is loosely based on Tom Christiansen's | |
2051 | I<ggh> script (available from your nearest CPAN archive in | |
2052 | F<authors/id/TOMC/scripts/nshist.gz>). | |
2053 | ||
3245f058 | 2054 | use warnings ; |
610ab055 | 2055 | use strict ; |
f6b705ef | 2056 | use DB_File ; |
2057 | use Fcntl ; | |
f6b705ef | 2058 | |
962cee9f | 2059 | my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ; |
f6b705ef | 2060 | $dotdir = $ENV{HOME} || $ENV{LOGNAME}; |
2061 | ||
2062 | $HISTORY = "$dotdir/.netscape/history.db"; | |
2063 | ||
2064 | tie %hist_db, 'DB_File', $HISTORY | |
2065 | or die "Cannot open $HISTORY: $!\n" ;; | |
2066 | ||
2067 | # Dump the complete database | |
2068 | while ( ($href, $binary_time) = each %hist_db ) { | |
2069 | ||
2070 | # remove the terminating NULL | |
2071 | $href =~ s/\x00$// ; | |
2072 | ||
2073 | # convert the binary time into a user friendly string | |
2074 | $date = localtime unpack("V", $binary_time); | |
2075 | print "$date $href\n" ; | |
2076 | } | |
2077 | ||
2078 | # check for the existence of a specific key | |
2079 | # remember to add the NULL | |
2080 | if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { | |
2081 | $date = localtime unpack("V", $binary_time) ; | |
2082 | print "Last visited mox.perl.com on $date\n" ; | |
2083 | } | |
2084 | else { | |
2085 | print "Never visited mox.perl.com\n" | |
2086 | } | |
2087 | ||
2088 | untie %hist_db ; | |
2089 | ||
68dc0745 | 2090 | =head2 The untie() Gotcha |
778183f3 | 2091 | |
7a2e2cd6 | 2092 | If you make use of the Berkeley DB API, it is I<very> strongly |
68dc0745 | 2093 | recommended that you read L<perltie/The untie Gotcha>. |
778183f3 PM |
2094 | |
2095 | Even if you don't currently make use of the API interface, it is still | |
2096 | worth reading it. | |
2097 | ||
2098 | Here is an example which illustrates the problem from a B<DB_File> | |
2099 | perspective: | |
2100 | ||
2101 | use DB_File ; | |
2102 | use Fcntl ; | |
2103 | ||
2104 | my %x ; | |
2105 | my $X ; | |
2106 | ||
2107 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC | |
2108 | or die "Cannot tie first time: $!" ; | |
2109 | ||
2110 | $x{123} = 456 ; | |
2111 | ||
2112 | untie %x ; | |
2113 | ||
2114 | tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT | |
2115 | or die "Cannot tie second time: $!" ; | |
2116 | ||
2117 | untie %x ; | |
2118 | ||
2119 | When run, the script will produce this error message: | |
2120 | ||
2121 | Cannot tie second time: Invalid argument at bad.file line 14. | |
2122 | ||
2123 | Although the error message above refers to the second tie() statement | |
2124 | in the script, the source of the problem is really with the untie() | |
2125 | statement that precedes it. | |
2126 | ||
2127 | Having read L<perltie> you will probably have already guessed that the | |
2128 | error is caused by the extra copy of the tied object stored in C<$X>. | |
2129 | If you haven't, then the problem boils down to the fact that the | |
2130 | B<DB_File> destructor, DESTROY, will not be called until I<all> | |
2131 | references to the tied object are destroyed. Both the tied variable, | |
2132 | C<%x>, and C<$X> above hold a reference to the object. The call to | |
2133 | untie() will destroy the first, but C<$X> still holds a valid | |
2134 | reference, so the destructor will not get called and the database file | |
2135 | F<tst.fil> will remain open. The fact that Berkeley DB then reports the | |
b90e71be | 2136 | attempt to open a database that is already open via the catch-all |
778183f3 PM |
2137 | "Invalid argument" doesn't help. |
2138 | ||
2139 | If you run the script with the C<-w> flag the error message becomes: | |
2140 | ||
2141 | untie attempted while 1 inner references still exist at bad.file line 12. | |
2142 | Cannot tie second time: Invalid argument at bad.file line 14. | |
2143 | ||
2144 | which pinpoints the real problem. Finally the script can now be | |
2145 | modified to fix the original problem by destroying the API object | |
2146 | before the untie: | |
2147 | ||
2148 | ... | |
2149 | $x{123} = 456 ; | |
2150 | ||
2151 | undef $X ; | |
2152 | untie %x ; | |
2153 | ||
2154 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT | |
2155 | ... | |
2156 | ||
f6b705ef | 2157 | |
2158 | =head1 COMMON QUESTIONS | |
2159 | ||
2160 | =head2 Why is there Perl source in my database? | |
2161 | ||
2162 | If you look at the contents of a database file created by DB_File, | |
2163 | there can sometimes be part of a Perl script included in it. | |
2164 | ||
2165 | This happens because Berkeley DB uses dynamic memory to allocate | |
2166 | buffers which will subsequently be written to the database file. Being | |
2167 | dynamic, the memory could have been used for anything before DB | |
2168 | malloced it. As Berkeley DB doesn't clear the memory once it has been | |
2169 | allocated, the unused portions will contain random junk. In the case | |
2170 | where a Perl script gets written to the database, the random junk will | |
2171 | correspond to an area of dynamic memory that happened to be used during | |
2172 | the compilation of the script. | |
2173 | ||
2174 | Unless you don't like the possibility of there being part of your Perl | |
2175 | scripts embedded in a database file, this is nothing to worry about. | |
2176 | ||
2177 | =head2 How do I store complex data structures with DB_File? | |
2178 | ||
2179 | Although B<DB_File> cannot do this directly, there is a module which | |
2180 | can layer transparently over B<DB_File> to accomplish this feat. | |
2181 | ||
2182 | Check out the MLDBM module, available on CPAN in the directory | |
2183 | F<modules/by-module/MLDBM>. | |
2184 | ||
2185 | =head2 What does "Invalid Argument" mean? | |
2186 | ||
2187 | You will get this error message when one of the parameters in the | |
2188 | C<tie> call is wrong. Unfortunately there are quite a few parameters to | |
2189 | get wrong, so it can be difficult to figure out which one it is. | |
2190 | ||
2191 | Here are a couple of possibilities: | |
2192 | ||
2193 | =over 5 | |
2194 | ||
2195 | =item 1. | |
2196 | ||
610ab055 | 2197 | Attempting to reopen a database without closing it. |
f6b705ef | 2198 | |
2199 | =item 2. | |
2200 | ||
2201 | Using the O_WRONLY flag. | |
2202 | ||
2203 | =back | |
2204 | ||
2205 | =head2 What does "Bareword 'DB_File' not allowed" mean? | |
2206 | ||
2207 | You will encounter this particular error message when you have the | |
2208 | C<strict 'subs'> pragma (or the full strict pragma) in your script. | |
2209 | Consider this script: | |
2210 | ||
3245f058 | 2211 | use warnings ; |
f6b705ef | 2212 | use strict ; |
2213 | use DB_File ; | |
07200f1b | 2214 | my %x ; |
f6b705ef | 2215 | tie %x, DB_File, "filename" ; |
2216 | ||
2217 | Running it produces the error in question: | |
2218 | ||
2219 | Bareword "DB_File" not allowed while "strict subs" in use | |
2220 | ||
2221 | To get around the error, place the word C<DB_File> in either single or | |
2222 | double quotes, like this: | |
2223 | ||
2224 | tie %x, "DB_File", "filename" ; | |
2225 | ||
2226 | Although it might seem like a real pain, it is really worth the effort | |
2227 | of having a C<use strict> in all your scripts. | |
2228 | ||
cad2e5aa JH |
2229 | =head1 REFERENCES |
2230 | ||
2231 | Articles that are either about B<DB_File> or make use of it. | |
2232 | ||
2233 | =over 5 | |
2234 | ||
2235 | =item 1. | |
2236 | ||
2237 | I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com), | |
2238 | Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41 | |
2239 | ||
2240 | =back | |
2241 | ||
cb1a09d0 AD |
2242 | =head1 HISTORY |
2243 | ||
1f70e1ea | 2244 | Moved to the Changes file. |
610ab055 | 2245 | |
1f70e1ea | 2246 | =head1 BUGS |
05475680 | 2247 | |
1f70e1ea PM |
2248 | Some older versions of Berkeley DB had problems with fixed length |
2249 | records using the RECNO file format. This problem has been fixed since | |
2250 | version 1.85 of Berkeley DB. | |
e858de61 | 2251 | |
1f70e1ea PM |
2252 | I am sure there are bugs in the code. If you do find any, or can |
2253 | suggest any enhancements, I would welcome your comments. | |
a6ed719b | 2254 | |
1f70e1ea | 2255 | =head1 AVAILABILITY |
a6ed719b | 2256 | |
1f70e1ea PM |
2257 | B<DB_File> comes with the standard Perl source distribution. Look in |
2258 | the directory F<ext/DB_File>. Given the amount of time between releases | |
2259 | of Perl the version that ships with Perl is quite likely to be out of | |
2260 | date, so the most recent version can always be found on CPAN (see | |
5bbd4290 | 2261 | L<perlmodlib/CPAN> for details), in the directory |
1f70e1ea | 2262 | F<modules/by-module/DB_File>. |
a6ed719b | 2263 | |
039d031f PM |
2264 | This version of B<DB_File> will work with either version 1.x, 2.x or |
2265 | 3.x of Berkeley DB, but is limited to the functionality provided by | |
2266 | version 1. | |
a6ed719b | 2267 | |
10261a36 | 2268 | The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>. |
039d031f | 2269 | All versions of Berkeley DB are available there. |
93af7a87 | 2270 | |
1f70e1ea PM |
2271 | Alternatively, Berkeley DB version 1 is available at your nearest CPAN |
2272 | archive in F<src/misc/db.1.85.tar.gz>. | |
e858de61 | 2273 | |
1f70e1ea | 2274 | =head1 COPYRIGHT |
3b35bae3 | 2275 | |
acb29889 | 2276 | Copyright (c) 1995-2012 Paul Marquess. All rights reserved. This program |
a9fd575d PM |
2277 | is free software; you can redistribute it and/or modify it under the |
2278 | same terms as Perl itself. | |
3b35bae3 | 2279 | |
1f70e1ea PM |
2280 | Although B<DB_File> is covered by the Perl license, the library it |
2281 | makes use of, namely Berkeley DB, is not. Berkeley DB has its own | |
2282 | copyright and its own license. Please take the time to read it. | |
3b35bae3 | 2283 | |
10af739e | 2284 | Here are a few words taken from the Berkeley DB FAQ (at |
10261a36 | 2285 | F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license: |
68dc0745 | 2286 | |
a9fd575d | 2287 | Do I have to license DB to use it in Perl scripts? |
3b35bae3 | 2288 | |
a9fd575d PM |
2289 | No. The Berkeley DB license requires that software that uses |
2290 | Berkeley DB be freely redistributable. In the case of Perl, that | |
2291 | software is Perl, and not your scripts. Any Perl scripts that you | |
2292 | write are your property, including scripts that make use of | |
2293 | Berkeley DB. Neither the Perl license nor the Berkeley DB license | |
2294 | place any restriction on what you may do with them. | |
88108326 | 2295 | |
1f70e1ea PM |
2296 | If you are in any doubt about the license situation, contact either the |
2297 | Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details. | |
a0b8c8c1 PM |
2298 | |
2299 | ||
3b35bae3 AD |
2300 | =head1 SEE ALSO |
2301 | ||
5bbd4290 PM |
2302 | L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>, |
2303 | L<perldbmfilter> | |
3b35bae3 | 2304 | |
3b35bae3 AD |
2305 | =head1 AUTHOR |
2306 | ||
8e07c86e | 2307 | The DB_File interface was written by Paul Marquess |
5bbd4290 | 2308 | E<lt>pmqs@cpan.orgE<gt>. |
3b35bae3 AD |
2309 | |
2310 | =cut |