perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	# DB_File.pm -- Perl 5 interface to Berkeley DB
	2	#
	3	# written by Paul Marquess (Paul.Marquess@btinternet.com)
	4	# last modified 17th December 2000
	5	# version 1.75
	6	#
	7	# Copyright (c) 1995-2000 Paul Marquess. All rights reserved.
	8	# This program is free software; you can redistribute it and/or
	9	# modify it under the same terms as Perl itself.
	10
	11
	12	package DB_File::HASHINFO ;
	13
	14	require 5.003 ;
	15
	16	use warnings;
	17	use strict;
	18	use Carp;
	19	require Tie::Hash;
	20	@DB_File::HASHINFO::ISA = qw(Tie::Hash);
	21
	22	sub new
	23	{
	24	my $pkg = shift ;
	25	my %x ;
	26	tie %x, $pkg ;
	27	bless \%x, $pkg ;
	28	}
	29
	30
	31	sub TIEHASH
	32	{
	33	my $pkg = shift ;
	34
	35	bless { VALID => { map {$_, 1}
	36	qw( bsize ffactor nelem cachesize hash lorder)
	37	},
	38	GOT => {}
	39	}, $pkg ;
	40	}
	41
	42
	43	sub FETCH
	44	{
	45	my $self = shift ;
	46	my $key = shift ;
	47
	48	return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
	49
	50	my $pkg = ref $self ;
	51	croak "${pkg}::FETCH - Unknown element '$key'" ;
	52	}
	53
	54
	55	sub STORE
	56	{
	57	my $self = shift ;
	58	my $key = shift ;
	59	my $value = shift ;
	60
	61	if ( exists $self->{VALID}{$key} )
	62	{
	63	$self->{GOT}{$key} = $value ;
	64	return ;
	65	}
	66
	67	my $pkg = ref $self ;
	68	croak "${pkg}::STORE - Unknown element '$key'" ;
	69	}
	70
	71	sub DELETE
	72	{
	73	my $self = shift ;
	74	my $key = shift ;
	75
	76	if ( exists $self->{VALID}{$key} )
	77	{
	78	delete $self->{GOT}{$key} ;
	79	return ;
	80	}
	81
	82	my $pkg = ref $self ;
	83	croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
	84	}
	85
	86	sub EXISTS
	87	{
	88	my $self = shift ;
	89	my $key = shift ;
	90
	91	exists $self->{VALID}{$key} ;
	92	}
	93
	94	sub NotHere
	95	{
	96	my $self = shift ;
	97	my $method = shift ;
	98
	99	croak ref($self) . " does not define the method ${method}" ;
	100	}
	101
	102	sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
	103	sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
	104	sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
	105
	106	package DB_File::RECNOINFO ;
	107
	108	use warnings;
	109	use strict ;
	110
	111	@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
	112
	113	sub TIEHASH
	114	{
	115	my $pkg = shift ;
	116
	117	bless { VALID => { map {$_, 1}
	118	qw( bval cachesize psize flags lorder reclen bfname )
	119	},
	120	GOT => {},
	121	}, $pkg ;
	122	}
	123
	124	package DB_File::BTREEINFO ;
	125
	126	use warnings;
	127	use strict ;
	128
	129	@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
	130
	131	sub TIEHASH
	132	{
	133	my $pkg = shift ;
	134
	135	bless { VALID => { map {$_, 1}
	136	qw( flags cachesize maxkeypage minkeypage psize
	137	compare prefix lorder )
	138	},
	139	GOT => {},
	140	}, $pkg ;
	141	}
	142
	143
	144	package DB_File ;
	145
	146	use warnings;
	147	use strict;
	148	use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO
	149	$db_version $use_XSLoader
	150	) ;
	151	use Carp;
	152
	153
	154	$VERSION = "1.75" ;
	155
	156	#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
	157	$DB_BTREE = new DB_File::BTREEINFO ;
	158	$DB_HASH = new DB_File::HASHINFO ;
	159	$DB_RECNO = new DB_File::RECNOINFO ;
	160
	161	require Tie::Hash;
	162	require Exporter;
	163	use AutoLoader;
	164	BEGIN {
	165	$use_XSLoader = 1 ;
	166	eval { require XSLoader } ;
	167
	168	if ($@) {
	169	$use_XSLoader = 0 ;
	170	require DynaLoader;
	171	@ISA = qw(DynaLoader);
	172	}
	173	}
	174
	175	push @ISA, qw(Tie::Hash Exporter);
	176	@EXPORT = qw(
	177	$DB_BTREE $DB_HASH $DB_RECNO
	178
	179	BTREEMAGIC
	180	BTREEVERSION
	181	DB_LOCK
	182	DB_SHMEM
	183	DB_TXN
	184	HASHMAGIC
	185	HASHVERSION
	186	MAX_PAGE_NUMBER
	187	MAX_PAGE_OFFSET
	188	MAX_REC_NUMBER
	189	RET_ERROR
	190	RET_SPECIAL
	191	RET_SUCCESS
	192	R_CURSOR
	193	R_DUP
	194	R_FIRST
	195	R_FIXEDLEN
	196	R_IAFTER
	197	R_IBEFORE
	198	R_LAST
	199	R_NEXT
	200	R_NOKEY
	201	R_NOOVERWRITE
	202	R_PREV
	203	R_RECNOSYNC
	204	R_SETCURSOR
	205	R_SNAPSHOT
	206	__R_UNUSED
	207
	208	);
	209
	210	sub AUTOLOAD {
	211	my($constname);
	212	($constname = $AUTOLOAD) =~ s/.*:://;
	213	my $val = constant($constname, @_ ? $_[0] : 0);
	214	if ($! != 0) {
	215	if ($! =~ /Invalid/ \|\| $!{EINVAL}) {
	216	$AutoLoader::AUTOLOAD = $AUTOLOAD;
	217	goto &AutoLoader::AUTOLOAD;
	218	}
	219	else {
	220	my($pack,$file,$line) = caller;
	221	croak "Your vendor has not defined DB macro $constname, used at $file line $line.
	222	";
	223	}
	224	}
	225	eval "sub $AUTOLOAD { $val }";
	226	goto &$AUTOLOAD;
	227	}
	228
	229
	230	eval {
	231	# Make all Fcntl O_XXX constants available for importing
	232	require Fcntl;
	233	my @O = grep /^O_/, @Fcntl::EXPORT;
	234	Fcntl->import(@O); # first we import what we want to export
	235	push(@EXPORT, @O);
	236	};
	237
	238	if ($use_XSLoader)
	239	{ XSLoader::load("DB_File", $VERSION)}
	240	else
	241	{ bootstrap DB_File $VERSION }
	242
	243	# Preloaded methods go here. Autoload methods go after __END__, and are
	244	# processed by the autosplit program.
	245
	246	sub tie_hash_or_array
	247	{
	248	my (@arg) = @_ ;
	249	my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
	250
	251	$arg[4] = tied %{ $arg[4] }
	252	if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
	253
	254	# make recno in Berkeley DB version 2 work like recno in version 1.
	255	if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
	256	$arg[1] and ! -e $arg[1]) {
	257	open(FH, ">$arg[1]") or return undef ;
	258	close FH ;
	259	chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
	260	}
	261
	262	DoTie_($tieHASH, @arg) ;
	263	}
	264
	265	sub TIEHASH
	266	{
	267	tie_hash_or_array(@_) ;
	268	}
	269
	270	sub TIEARRAY
	271	{
	272	tie_hash_or_array(@_) ;
	273	}
	274
	275	sub CLEAR
	276	{
	277	my $self = shift;
	278	my $key = 0 ;
	279	my $value = "" ;
	280	my $status = $self->seq($key, $value, R_FIRST());
	281	my @keys;
	282
	283	while ($status == 0) {
	284	push @keys, $key;
	285	$status = $self->seq($key, $value, R_NEXT());
	286	}
	287	foreach $key (reverse @keys) {
	288	my $s = $self->del($key);
	289	}
	290	}
	291
	292	sub EXTEND { }
	293
	294	sub STORESIZE
	295	{
	296	my $self = shift;
	297	my $length = shift ;
	298	my $current_length = $self->length() ;
	299
	300	if ($length < $current_length) {
	301	my $key ;
	302	for ($key = $current_length - 1 ; $key >= $length ; -- $key)
	303	{ $self->del($key) }
	304	}
	305	elsif ($length > $current_length) {
	306	$self->put($length-1, "") ;
	307	}
	308	}
	309
	310	sub find_dup
	311	{
	312	croak "Usage: \$db->find_dup(key,value)\n"
	313	unless @_ == 3 ;
	314
	315	my $db = shift ;
	316	my ($origkey, $value_wanted) = @_ ;
	317	my ($key, $value) = ($origkey, 0);
	318	my ($status) = 0 ;
	319
	320	for ($status = $db->seq($key, $value, R_CURSOR() ) ;
	321	$status == 0 ;
	322	$status = $db->seq($key, $value, R_NEXT() ) ) {
	323
	324	return 0 if $key eq $origkey and $value eq $value_wanted ;
	325	}
	326
	327	return $status ;
	328	}
	329
	330	sub del_dup
	331	{
	332	croak "Usage: \$db->del_dup(key,value)\n"
	333	unless @_ == 3 ;
	334
	335	my $db = shift ;
	336	my ($key, $value) = @_ ;
	337	my ($status) = $db->find_dup($key, $value) ;
	338	return $status if $status != 0 ;
	339
	340	$status = $db->del($key, R_CURSOR() ) ;
	341	return $status ;
	342	}
	343
	344	sub get_dup
	345	{
	346	croak "Usage: \$db->get_dup(key [,flag])\n"
	347	unless @_ == 2 or @_ == 3 ;
	348
	349	my $db = shift ;
	350	my $key = shift ;
	351	my $flag = shift ;
	352	my $value = 0 ;
	353	my $origkey = $key ;
	354	my $wantarray = wantarray ;
	355	my %values = () ;
	356	my @values = () ;
	357	my $counter = 0 ;
	358	my $status = 0 ;
	359
	360	# iterate through the database until either EOF ($status == 0)
	361	# or a different key is encountered ($key ne $origkey).
	362	for ($status = $db->seq($key, $value, R_CURSOR()) ;
	363	$status == 0 and $key eq $origkey ;
	364	$status = $db->seq($key, $value, R_NEXT()) ) {
	365
	366	# save the value or count number of matches
	367	if ($wantarray) {
	368	if ($flag)
	369	{ ++ $values{$value} }
	370	else
	371	{ push (@values, $value) }
	372	}
	373	else
	374	{ ++ $counter }
	375
	376	}
	377
	378	return ($wantarray ? ($flag ? %values : @values) : $counter) ;
	379	}
	380
	381
	382	1;
	383	__END__
	384
	385	=head1 NAME
	386
	387	DB_File - Perl5 access to Berkeley DB version 1.x
	388
	389	=head1 SYNOPSIS
	390
	391	use DB_File ;
	392
	393	[$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
	394	[$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
	395	[$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
	396
	397	$status = $X->del($key [, $flags]) ;
	398	$status = $X->put($key, $value [, $flags]) ;
	399	$status = $X->get($key, $value [, $flags]) ;
	400	$status = $X->seq($key, $value, $flags) ;
	401	$status = $X->sync([$flags]) ;
	402	$status = $X->fd ;
	403
	404	# BTREE only
	405	$count = $X->get_dup($key) ;
	406	@list = $X->get_dup($key) ;
	407	%list = $X->get_dup($key, 1) ;
	408	$status = $X->find_dup($key, $value) ;
	409	$status = $X->del_dup($key, $value) ;
	410
	411	# RECNO only
	412	$a = $X->length;
	413	$a = $X->pop ;
	414	$X->push(list);
	415	$a = $X->shift;
	416	$X->unshift(list);
	417
	418	# DBM Filters
	419	$old_filter = $db->filter_store_key ( sub { ... } ) ;
	420	$old_filter = $db->filter_store_value( sub { ... } ) ;
	421	$old_filter = $db->filter_fetch_key ( sub { ... } ) ;
	422	$old_filter = $db->filter_fetch_value( sub { ... } ) ;
	423
	424	untie %hash ;
	425	untie @array ;
	426
	427	=head1 DESCRIPTION
	428
	429	B<DB_File> is a module which allows Perl programs to make use of the
	430	facilities provided by Berkeley DB version 1.x (if you have a newer
	431	version of DB, see L<Using DB_File with Berkeley DB version 2 or 3>).
	432	It is assumed that you have a copy of the Berkeley DB manual pages at
	433	hand when reading this documentation. The interface defined here
	434	mirrors the Berkeley DB interface closely.
	435
	436	Berkeley DB is a C library which provides a consistent interface to a
	437	number of database formats. B<DB_File> provides an interface to all
	438	three of the database types currently supported by Berkeley DB.
	439
	440	The file types are:
	441
	442	=over 5
	443
	444	=item B<DB_HASH>
	445
	446	This database type allows arbitrary key/value pairs to be stored in data
	447	files. This is equivalent to the functionality provided by other
	448	hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
	449	the files created using DB_HASH are not compatible with any of the
	450	other packages mentioned.
	451
	452	A default hashing algorithm, which will be adequate for most
	453	applications, is built into Berkeley DB. If you do need to use your own
	454	hashing algorithm it is possible to write your own in Perl and have
	455	B<DB_File> use it instead.
	456
	457	=item B<DB_BTREE>
	458
	459	The btree format allows arbitrary key/value pairs to be stored in a
	460	sorted, balanced binary tree.
	461
	462	As with the DB_HASH format, it is possible to provide a user defined
	463	Perl routine to perform the comparison of keys. By default, though, the
	464	keys are stored in lexical order.
	465
	466	=item B<DB_RECNO>
	467
	468	DB_RECNO allows both fixed-length and variable-length flat text files
	469	to be manipulated using the same key/value pair interface as in DB_HASH
	470	and DB_BTREE. In this case the key will consist of a record (line)
	471	number.
	472
	473	=back
	474
	475	=head2 Using DB_File with Berkeley DB version 2 or 3
	476
	477	Although B<DB_File> is intended to be used with Berkeley DB version 1,
	478	it can also be used with version 2.or 3 In this case the interface is
	479	limited to the functionality provided by Berkeley DB 1.x. Anywhere the
	480	version 2 or 3 interface differs, B<DB_File> arranges for it to work
	481	like version 1. This feature allows B<DB_File> scripts that were built
	482	with version 1 to be migrated to version 2 or 3 without any changes.
	483
	484	If you want to make use of the new features available in Berkeley DB
	485	2.x or greater, use the Perl module B<BerkeleyDB> instead.
	486
	487	B<Note:> The database file format has changed in both Berkeley DB
	488	version 2 and 3. If you cannot recreate your databases, you must dump
	489	any existing databases with the C<db_dump185> utility that comes with
	490	Berkeley DB.
	491	Once you have rebuilt DB_File to use Berkeley DB version 2 or 3, your
	492	databases can be recreated using C<db_load>. Refer to the Berkeley DB
	493	documentation for further details.
	494
	495	Please read L<"COPYRIGHT"> before using version 2.x or 3.x of Berkeley
	496	DB with DB_File.
	497
	498	=head2 Interface to Berkeley DB
	499
	500	B<DB_File> allows access to Berkeley DB files using the tie() mechanism
	501	in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
	502	allows B<DB_File> to access Berkeley DB files using either an
	503	associative array (for DB_HASH & DB_BTREE file types) or an ordinary
	504	array (for the DB_RECNO file type).
	505
	506	In addition to the tie() interface, it is also possible to access most
	507	of the functions provided in the Berkeley DB API directly.
	508	See L<THE API INTERFACE>.
	509
	510	=head2 Opening a Berkeley DB Database File
	511
	512	Berkeley DB uses the function dbopen() to open or create a database.
	513	Here is the C prototype for dbopen():
	514
	515	DB*
	516	dbopen (const char * file, int flags, int mode,
	517	DBTYPE type, const void * openinfo)
	518
	519	The parameter C<type> is an enumeration which specifies which of the 3
	520	interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
	521	Depending on which of these is actually chosen, the final parameter,
	522	I<openinfo> points to a data structure which allows tailoring of the
	523	specific interface method.
	524
	525	This interface is handled slightly differently in B<DB_File>. Here is
	526	an equivalent call using B<DB_File>:
	527
	528	tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
	529
	530	The C<filename>, C<flags> and C<mode> parameters are the direct
	531	equivalent of their dbopen() counterparts. The final parameter $DB_HASH
	532	performs the function of both the C<type> and C<openinfo> parameters in
	533	dbopen().
	534
	535	In the example above $DB_HASH is actually a pre-defined reference to a
	536	hash object. B<DB_File> has three of these pre-defined references.
	537	Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
	538
	539	The keys allowed in each of these pre-defined references is limited to
	540	the names used in the equivalent C structure. So, for example, the
	541	$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
	542	C<ffactor>, C<hash>, C<lorder> and C<nelem>.
	543
	544	To change one of these elements, just assign to it like this:
	545
	546	$DB_HASH->{'cachesize'} = 10000 ;
	547
	548	The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
	549	usually adequate for most applications. If you do need to create extra
	550	instances of these objects, constructors are available for each file
	551	type.
	552
	553	Here are examples of the constructors and the valid options available
	554	for DB_HASH, DB_BTREE and DB_RECNO respectively.
	555
	556	$a = new DB_File::HASHINFO ;
	557	$a->{'bsize'} ;
	558	$a->{'cachesize'} ;
	559	$a->{'ffactor'};
	560	$a->{'hash'} ;
	561	$a->{'lorder'} ;
	562	$a->{'nelem'} ;
	563
	564	$b = new DB_File::BTREEINFO ;
	565	$b->{'flags'} ;
	566	$b->{'cachesize'} ;
	567	$b->{'maxkeypage'} ;
	568	$b->{'minkeypage'} ;
	569	$b->{'psize'} ;
	570	$b->{'compare'} ;
	571	$b->{'prefix'} ;
	572	$b->{'lorder'} ;
	573
	574	$c = new DB_File::RECNOINFO ;
	575	$c->{'bval'} ;
	576	$c->{'cachesize'} ;
	577	$c->{'psize'} ;
	578	$c->{'flags'} ;
	579	$c->{'lorder'} ;
	580	$c->{'reclen'} ;
	581	$c->{'bfname'} ;
	582
	583	The values stored in the hashes above are mostly the direct equivalent
	584	of their C counterpart. Like their C counterparts, all are set to a
	585	default values - that means you don't have to set I<all> of the
	586	values when you only want to change one. Here is an example:
	587
	588	$a = new DB_File::HASHINFO ;
	589	$a->{'cachesize'} = 12345 ;
	590	tie %y, 'DB_File', "filename", $flags, 0777, $a ;
	591
	592	A few of the options need extra discussion here. When used, the C
	593	equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
	594	to C functions. In B<DB_File> these keys are used to store references
	595	to Perl subs. Below are templates for each of the subs:
	596
	597	sub hash
	598	{
	599	my ($data) = @_ ;
	600	...
	601	# return the hash value for $data
	602	return $hash ;
	603	}
	604
	605	sub compare
	606	{
	607	my ($key, $key2) = @_ ;
	608	...
	609	# return 0 if $key1 eq $key2
	610	# -1 if $key1 lt $key2
	611	# 1 if $key1 gt $key2
	612	return (-1 , 0 or 1) ;
	613	}
	614
	615	sub prefix
	616	{
	617	my ($key, $key2) = @_ ;
	618	...
	619	# return number of bytes of $key2 which are
	620	# necessary to determine that it is greater than $key1
	621	return $bytes ;
	622	}
	623
	624	See L<Changing the BTREE sort order> for an example of using the
	625	C<compare> template.
	626
	627	If you are using the DB_RECNO interface and you intend making use of
	628	C<bval>, you should check out L<The 'bval' Option>.
	629
	630	=head2 Default Parameters
	631
	632	It is possible to omit some or all of the final 4 parameters in the
	633	call to C<tie> and let them take default values. As DB_HASH is the most
	634	common file format used, the call:
	635
	636	tie %A, "DB_File", "filename" ;
	637
	638	is equivalent to:
	639
	640	tie %A, "DB_File", "filename", O_CREAT\|O_RDWR, 0666, $DB_HASH ;
	641
	642	It is also possible to omit the filename parameter as well, so the
	643	call:
	644
	645	tie %A, "DB_File" ;
	646
	647	is equivalent to:
	648
	649	tie %A, "DB_File", undef, O_CREAT\|O_RDWR, 0666, $DB_HASH ;
	650
	651	See L<In Memory Databases> for a discussion on the use of C<undef>
	652	in place of a filename.
	653
	654	=head2 In Memory Databases
	655
	656	Berkeley DB allows the creation of in-memory databases by using NULL
	657	(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
	658	uses C<undef> instead of NULL to provide this functionality.
	659
	660	=head1 DB_HASH
	661
	662	The DB_HASH file format is probably the most commonly used of the three
	663	file formats that B<DB_File> supports. It is also very straightforward
	664	to use.
	665
	666	=head2 A Simple Example
	667
	668	This example shows how to create a database, add key/value pairs to the
	669	database, delete keys/value pairs and finally how to enumerate the
	670	contents of the database.
	671
	672	use warnings ;
	673	use strict ;
	674	use DB_File ;
	675	use vars qw( %h $k $v ) ;
	676
	677	unlink "fruit" ;
	678	tie %h, "DB_File", "fruit", O_RDWR\|O_CREAT, 0640, $DB_HASH
	679	or die "Cannot open file 'fruit': $!\n";
	680
	681	# Add a few key/value pairs to the file
	682	$h{"apple"} = "red" ;
	683	$h{"orange"} = "orange" ;
	684	$h{"banana"} = "yellow" ;
	685	$h{"tomato"} = "red" ;
	686
	687	# Check for existence of a key
	688	print "Banana Exists\n\n" if $h{"banana"} ;
	689
	690	# Delete a key/value pair.
	691	delete $h{"apple"} ;
	692
	693	# print the contents of the file
	694	while (($k, $v) = each %h)
	695	{ print "$k -> $v\n" }
	696
	697	untie %h ;
	698
	699	here is the output:
	700
	701	Banana Exists
	702
	703	orange -> orange
	704	tomato -> red
	705	banana -> yellow
	706
	707	Note that the like ordinary associative arrays, the order of the keys
	708	retrieved is in an apparently random order.
	709
	710	=head1 DB_BTREE
	711
	712	The DB_BTREE format is useful when you want to store data in a given
	713	order. By default the keys will be stored in lexical order, but as you
	714	will see from the example shown in the next section, it is very easy to
	715	define your own sorting function.
	716
	717	=head2 Changing the BTREE sort order
	718
	719	This script shows how to override the default sorting algorithm that
	720	BTREE uses. Instead of using the normal lexical ordering, a case
	721	insensitive compare function will be used.
	722
	723	use warnings ;
	724	use strict ;
	725	use DB_File ;
	726
	727	my %h ;
	728
	729	sub Compare
	730	{
	731	my ($key1, $key2) = @_ ;
	732	"\L$key1" cmp "\L$key2" ;
	733	}
	734
	735	# specify the Perl sub that will do the comparison
	736	$DB_BTREE->{'compare'} = \&Compare ;
	737
	738	unlink "tree" ;
	739	tie %h, "DB_File", "tree", O_RDWR\|O_CREAT, 0640, $DB_BTREE
	740	or die "Cannot open file 'tree': $!\n" ;
	741
	742	# Add a key/value pair to the file
	743	$h{'Wall'} = 'Larry' ;
	744	$h{'Smith'} = 'John' ;
	745	$h{'mouse'} = 'mickey' ;
	746	$h{'duck'} = 'donald' ;
	747
	748	# Delete
	749	delete $h{"duck"} ;
	750
	751	# Cycle through the keys printing them in order.
	752	# Note it is not necessary to sort the keys as
	753	# the btree will have kept them in order automatically.
	754	foreach (keys %h)
	755	{ print "$_\n" }
	756
	757	untie %h ;
	758
	759	Here is the output from the code above.
	760
	761	mouse
	762	Smith
	763	Wall
	764
	765	There are a few point to bear in mind if you want to change the
	766	ordering in a BTREE database:
	767
	768	=over 5
	769
	770	=item 1.
	771
	772	The new compare function must be specified when you create the database.
	773
	774	=item 2.
	775
	776	You cannot change the ordering once the database has been created. Thus
	777	you must use the same compare function every time you access the
	778	database.
	779
	780	=back
	781
	782	=head2 Handling Duplicate Keys
	783
	784	The BTREE file type optionally allows a single key to be associated
	785	with an arbitrary number of values. This option is enabled by setting
	786	the flags element of C<$DB_BTREE> to R_DUP when creating the database.
	787
	788	There are some difficulties in using the tied hash interface if you
	789	want to manipulate a BTREE database with duplicate keys. Consider this
	790	code:
	791
	792	use warnings ;
	793	use strict ;
	794	use DB_File ;
	795
	796	use vars qw($filename %h ) ;
	797
	798	$filename = "tree" ;
	799	unlink $filename ;
	800
	801	# Enable duplicate records
	802	$DB_BTREE->{'flags'} = R_DUP ;
	803
	804	tie %h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_BTREE
	805	or die "Cannot open $filename: $!\n";
	806
	807	# Add some key/value pairs to the file
	808	$h{'Wall'} = 'Larry' ;
	809	$h{'Wall'} = 'Brick' ; # Note the duplicate key
	810	$h{'Wall'} = 'Brick' ; # Note the duplicate key and value
	811	$h{'Smith'} = 'John' ;
	812	$h{'mouse'} = 'mickey' ;
	813
	814	# iterate through the associative array
	815	# and print each key/value pair.
	816	foreach (sort keys %h)
	817	{ print "$_ -> $h{$_}\n" }
	818
	819	untie %h ;
	820
	821	Here is the output:
	822
	823	Smith -> John
	824	Wall -> Larry
	825	Wall -> Larry
	826	Wall -> Larry
	827	mouse -> mickey
	828
	829	As you can see 3 records have been successfully created with key C<Wall>
	830	- the only thing is, when they are retrieved from the database they
	831	I<seem> to have the same value, namely C<Larry>. The problem is caused
	832	by the way that the associative array interface works. Basically, when
	833	the associative array interface is used to fetch the value associated
	834	with a given key, it will only ever retrieve the first value.
	835
	836	Although it may not be immediately obvious from the code above, the
	837	associative array interface can be used to write values with duplicate
	838	keys, but it cannot be used to read them back from the database.
	839
	840	The way to get around this problem is to use the Berkeley DB API method
	841	called C<seq>. This method allows sequential access to key/value
	842	pairs. See L<THE API INTERFACE> for details of both the C<seq> method
	843	and the API in general.
	844
	845	Here is the script above rewritten using the C<seq> API method.
	846
	847	use warnings ;
	848	use strict ;
	849	use DB_File ;
	850
	851	use vars qw($filename $x %h $status $key $value) ;
	852
	853	$filename = "tree" ;
	854	unlink $filename ;
	855
	856	# Enable duplicate records
	857	$DB_BTREE->{'flags'} = R_DUP ;
	858
	859	$x = tie %h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_BTREE
	860	or die "Cannot open $filename: $!\n";
	861
	862	# Add some key/value pairs to the file
	863	$h{'Wall'} = 'Larry' ;
	864	$h{'Wall'} = 'Brick' ; # Note the duplicate key
	865	$h{'Wall'} = 'Brick' ; # Note the duplicate key and value
	866	$h{'Smith'} = 'John' ;
	867	$h{'mouse'} = 'mickey' ;
	868
	869	# iterate through the btree using seq
	870	# and print each key/value pair.
	871	$key = $value = 0 ;
	872	for ($status = $x->seq($key, $value, R_FIRST) ;
	873	$status == 0 ;
	874	$status = $x->seq($key, $value, R_NEXT) )
	875	{ print "$key -> $value\n" }
	876
	877	undef $x ;
	878	untie %h ;
	879
	880	that prints:
	881
	882	Smith -> John
	883	Wall -> Brick
	884	Wall -> Brick
	885	Wall -> Larry
	886	mouse -> mickey
	887
	888	This time we have got all the key/value pairs, including the multiple
	889	values associated with the key C<Wall>.
	890
	891	To make life easier when dealing with duplicate keys, B<DB_File> comes with
	892	a few utility methods.
	893
	894	=head2 The get_dup() Method
	895
	896	The C<get_dup> method assists in
	897	reading duplicate values from BTREE databases. The method can take the
	898	following forms:
	899
	900	$count = $x->get_dup($key) ;
	901	@list = $x->get_dup($key) ;
	902	%list = $x->get_dup($key, 1) ;
	903
	904	In a scalar context the method returns the number of values associated
	905	with the key, C<$key>.
	906
	907	In list context, it returns all the values which match C<$key>. Note
	908	that the values will be returned in an apparently random order.
	909
	910	In list context, if the second parameter is present and evaluates
	911	TRUE, the method returns an associative array. The keys of the
	912	associative array correspond to the values that matched in the BTREE
	913	and the values of the array are a count of the number of times that
	914	particular value occurred in the BTREE.
	915
	916	So assuming the database created above, we can use C<get_dup> like
	917	this:
	918
	919	use warnings ;
	920	use strict ;
	921	use DB_File ;
	922
	923	use vars qw($filename $x %h ) ;
	924
	925	$filename = "tree" ;
	926
	927	# Enable duplicate records
	928	$DB_BTREE->{'flags'} = R_DUP ;
	929
	930	$x = tie %h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_BTREE
	931	or die "Cannot open $filename: $!\n";
	932
	933	my $cnt = $x->get_dup("Wall") ;
	934	print "Wall occurred $cnt times\n" ;
	935
	936	my %hash = $x->get_dup("Wall", 1) ;
	937	print "Larry is there\n" if $hash{'Larry'} ;
	938	print "There are $hash{'Brick'} Brick Walls\n" ;
	939
	940	my @list = sort $x->get_dup("Wall") ;
	941	print "Wall => [@list]\n" ;
	942
	943	@list = $x->get_dup("Smith") ;
	944	print "Smith => [@list]\n" ;
	945
	946	@list = $x->get_dup("Dog") ;
	947	print "Dog => [@list]\n" ;
	948
	949
	950	and it will print:
	951
	952	Wall occurred 3 times
	953	Larry is there
	954	There are 2 Brick Walls
	955	Wall => [Brick Brick Larry]
	956	Smith => [John]
	957	Dog => []
	958
	959	=head2 The find_dup() Method
	960
	961	$status = $X->find_dup($key, $value) ;
	962
	963	This method checks for the existence of a specific key/value pair. If the
	964	pair exists, the cursor is left pointing to the pair and the method
	965	returns 0. Otherwise the method returns a non-zero value.
	966
	967	Assuming the database from the previous example:
	968
	969	use warnings ;
	970	use strict ;
	971	use DB_File ;
	972
	973	use vars qw($filename $x %h $found) ;
	974
	975	my $filename = "tree" ;
	976
	977	# Enable duplicate records
	978	$DB_BTREE->{'flags'} = R_DUP ;
	979
	980	$x = tie %h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_BTREE
	981	or die "Cannot open $filename: $!\n";
	982
	983	$found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
	984	print "Larry Wall is $found there\n" ;
	985
	986	$found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
	987	print "Harry Wall is $found there\n" ;
	988
	989	undef $x ;
	990	untie %h ;
	991
	992	prints this
	993
	994	Larry Wall is there
	995	Harry Wall is not there
	996
	997
	998	=head2 The del_dup() Method
	999
	1000	$status = $X->del_dup($key, $value) ;
	1001
	1002	This method deletes a specific key/value pair. It returns
	1003	0 if they exist and have been deleted successfully.
	1004	Otherwise the method returns a non-zero value.
	1005
	1006	Again assuming the existence of the C<tree> database
	1007
	1008	use warnings ;
	1009	use strict ;
	1010	use DB_File ;
	1011
	1012	use vars qw($filename $x %h $found) ;
	1013
	1014	my $filename = "tree" ;
	1015
	1016	# Enable duplicate records
	1017	$DB_BTREE->{'flags'} = R_DUP ;
	1018
	1019	$x = tie %h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_BTREE
	1020	or die "Cannot open $filename: $!\n";
	1021
	1022	$x->del_dup("Wall", "Larry") ;
	1023
	1024	$found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
	1025	print "Larry Wall is $found there\n" ;
	1026
	1027	undef $x ;
	1028	untie %h ;
	1029
	1030	prints this
	1031
	1032	Larry Wall is not there
	1033
	1034	=head2 Matching Partial Keys
	1035
	1036	The BTREE interface has a feature which allows partial keys to be
	1037	matched. This functionality is I<only> available when the C<seq> method
	1038	is used along with the R_CURSOR flag.
	1039
	1040	$x->seq($key, $value, R_CURSOR) ;
	1041
	1042	Here is the relevant quote from the dbopen man page where it defines
	1043	the use of the R_CURSOR flag with seq:
	1044
	1045	Note, for the DB_BTREE access method, the returned key is not
	1046	necessarily an exact match for the specified key. The returned key
	1047	is the smallest key greater than or equal to the specified key,
	1048	permitting partial key matches and range searches.
	1049
	1050	In the example script below, the C<match> sub uses this feature to find
	1051	and print the first matching key/value pair given a partial key.
	1052
	1053	use warnings ;
	1054	use strict ;
	1055	use DB_File ;
	1056	use Fcntl ;
	1057
	1058	use vars qw($filename $x %h $st $key $value) ;
	1059
	1060	sub match
	1061	{
	1062	my $key = shift ;
	1063	my $value = 0;
	1064	my $orig_key = $key ;
	1065	$x->seq($key, $value, R_CURSOR) ;
	1066	print "$orig_key\t-> $key\t-> $value\n" ;
	1067	}
	1068
	1069	$filename = "tree" ;
	1070	unlink $filename ;
	1071
	1072	$x = tie %h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_BTREE
	1073	or die "Cannot open $filename: $!\n";
	1074
	1075	# Add some key/value pairs to the file
	1076	$h{'mouse'} = 'mickey' ;
	1077	$h{'Wall'} = 'Larry' ;
	1078	$h{'Walls'} = 'Brick' ;
	1079	$h{'Smith'} = 'John' ;
	1080
	1081
	1082	$key = $value = 0 ;
	1083	print "IN ORDER\n" ;
	1084	for ($st = $x->seq($key, $value, R_FIRST) ;
	1085	$st == 0 ;
	1086	$st = $x->seq($key, $value, R_NEXT) )
	1087
	1088	{ print "$key -> $value\n" }
	1089
	1090	print "\nPARTIAL MATCH\n" ;
	1091
	1092	match "Wa" ;
	1093	match "A" ;
	1094	match "a" ;
	1095
	1096	undef $x ;
	1097	untie %h ;
	1098
	1099	Here is the output:
	1100
	1101	IN ORDER
	1102	Smith -> John
	1103	Wall -> Larry
	1104	Walls -> Brick
	1105	mouse -> mickey
	1106
	1107	PARTIAL MATCH
	1108	Wa -> Wall -> Larry
	1109	A -> Smith -> John
	1110	a -> mouse -> mickey
	1111
	1112	=head1 DB_RECNO
	1113
	1114	DB_RECNO provides an interface to flat text files. Both variable and
	1115	fixed length records are supported.
	1116
	1117	In order to make RECNO more compatible with Perl, the array offset for
	1118	all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
	1119
	1120	As with normal Perl arrays, a RECNO array can be accessed using
	1121	negative indexes. The index -1 refers to the last element of the array,
	1122	-2 the second last, and so on. Attempting to access an element before
	1123	the start of the array will raise a fatal run-time error.
	1124
	1125	=head2 The 'bval' Option
	1126
	1127	The operation of the bval option warrants some discussion. Here is the
	1128	definition of bval from the Berkeley DB 1.85 recno manual page:
	1129
	1130	The delimiting byte to be used to mark the end of a
	1131	record for variable-length records, and the pad charac-
	1132	ter for fixed-length records. If no value is speci-
	1133	fied, newlines (``\n'') are used to mark the end of
	1134	variable-length records and fixed-length records are
	1135	padded with spaces.
	1136
	1137	The second sentence is wrong. In actual fact bval will only default to
	1138	C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
	1139	openinfo parameter is used at all, the value that happens to be in bval
	1140	will be used. That means you always have to specify bval when making
	1141	use of any of the options in the openinfo parameter. This documentation
	1142	error will be fixed in the next release of Berkeley DB.
	1143
	1144	That clarifies the situation with regards Berkeley DB itself. What
	1145	about B<DB_File>? Well, the behavior defined in the quote above is
	1146	quite useful, so B<DB_File> conforms to it.
	1147
	1148	That means that you can specify other options (e.g. cachesize) and
	1149	still have bval default to C<"\n"> for variable length records, and
	1150	space for fixed length records.
	1151
	1152	=head2 A Simple Example
	1153
	1154	Here is a simple example that uses RECNO (if you are using a version
	1155	of Perl earlier than 5.004_57 this example won't work -- see
	1156	L<Extra RECNO Methods> for a workaround).
	1157
	1158	use warnings ;
	1159	use strict ;
	1160	use DB_File ;
	1161
	1162	my $filename = "text" ;
	1163	unlink $filename ;
	1164
	1165	my @h ;
	1166	tie @h, "DB_File", $filename, O_RDWR\|O_CREAT, 0640, $DB_RECNO
	1167	or die "Cannot open file 'text': $!\n" ;
	1168
	1169	# Add a few key/value pairs to the file
	1170	$h[0] = "orange" ;
	1171	$h[1] = "blue" ;
	1172	$h[2] = "yellow" ;
	1173
	1174	push @h, "green", "black" ;
	1175
	1176	my $elements = scalar @h ;
	1177	print "The array contains $elements entries\n" ;
	1178
	1179	my $last = pop @h ;
	1180	print "popped $last\n" ;
	1181
	1182	unshift @h, "white" ;
	1183	my $first = shift @h ;
	1184	print "shifted $first\n" ;
	1185
	1186	# Check for existence of a key
	1187	print "Element 1 Exists with value $h[1]\n" if $h[1] ;
	1188
	1189	# use a negative index
	1190	print "The last element is $h[-1]\n" ;
	1191	print "The 2nd last element is $h[-2]\n" ;
	1192
	1193	untie @h ;
	1194
	1195	Here is the output from the script:
	1196
	1197	The array contains 5 entries
	1198	popped black
	1199	shifted white
	1200	Element 1 Exists with value blue
	1201	The last element is green
	1202	The 2nd last element is yellow
	1203
	1204	=head2 Extra RECNO Methods
	1205
	1206	If you are using a version of Perl earlier than 5.004_57, the tied
	1207	array interface is quite limited. In the example script above
	1208	C<push>, C<pop>, C<shift>, C<unshift>
	1209	or determining the array length will not work with a tied array.
	1210
	1211	To make the interface more useful for older versions of Perl, a number
	1212	of methods are supplied with B<DB_File> to simulate the missing array
	1213	operations. All these methods are accessed via the object returned from
	1214	the tie call.
	1215
	1216	Here are the methods:
	1217
	1218	=over 5
	1219
	1220	=item B<$X-E<gt>push(list) ;>
	1221
	1222	Pushes the elements of C<list> to the end of the array.
	1223
	1224	=item B<$value = $X-E<gt>pop ;>
	1225
	1226	Removes and returns the last element of the array.
	1227
	1228	=item B<$X-E<gt>shift>
	1229
	1230	Removes and returns the first element of the array.
	1231
	1232	=item B<$X-E<gt>unshift(list) ;>
	1233
	1234	Pushes the elements of C<list> to the start of the array.
	1235
	1236	=item B<$X-E<gt>length>
	1237
	1238	Returns the number of elements in the array.
	1239
	1240	=back
	1241
	1242	=head2 Another Example
	1243
	1244	Here is a more complete example that makes use of some of the methods
	1245	described above. It also makes use of the API interface directly (see
	1246	L<THE API INTERFACE>).
	1247
	1248	use warnings ;
	1249	use strict ;
	1250	use vars qw(@h $H $file $i) ;
	1251	use DB_File ;
	1252	use Fcntl ;
	1253
	1254	$file = "text" ;
	1255
	1256	unlink $file ;
	1257
	1258	$H = tie @h, "DB_File", $file, O_RDWR\|O_CREAT, 0640, $DB_RECNO
	1259	or die "Cannot open file $file: $!\n" ;
	1260
	1261	# first create a text file to play with
	1262	$h[0] = "zero" ;
	1263	$h[1] = "one" ;
	1264	$h[2] = "two" ;
	1265	$h[3] = "three" ;
	1266	$h[4] = "four" ;
	1267
	1268
	1269	# Print the records in order.
	1270	#
	1271	# The length method is needed here because evaluating a tied
	1272	# array in a scalar context does not return the number of
	1273	# elements in the array.
	1274
	1275	print "\nORIGINAL\n" ;
	1276	foreach $i (0 .. $H->length - 1) {
	1277	print "$i: $h[$i]\n" ;
	1278	}
	1279
	1280	# use the push & pop methods
	1281	$a = $H->pop ;
	1282	$H->push("last") ;
	1283	print "\nThe last record was [$a]\n" ;
	1284
	1285	# and the shift & unshift methods
	1286	$a = $H->shift ;
	1287	$H->unshift("first") ;
	1288	print "The first record was [$a]\n" ;
	1289
	1290	# Use the API to add a new record after record 2.
	1291	$i = 2 ;
	1292	$H->put($i, "Newbie", R_IAFTER) ;
	1293
	1294	# and a new record before record 1.
	1295	$i = 1 ;
	1296	$H->put($i, "New One", R_IBEFORE) ;
	1297
	1298	# delete record 3
	1299	$H->del(3) ;
	1300
	1301	# now print the records in reverse order
	1302	print "\nREVERSE\n" ;
	1303	for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
	1304	{ print "$i: $h[$i]\n" }
	1305
	1306	# same again, but use the API functions instead
	1307	print "\nREVERSE again\n" ;
	1308	my ($s, $k, $v) = (0, 0, 0) ;
	1309	for ($s = $H->seq($k, $v, R_LAST) ;
	1310	$s == 0 ;
	1311	$s = $H->seq($k, $v, R_PREV))
	1312	{ print "$k: $v\n" }
	1313
	1314	undef $H ;
	1315	untie @h ;
	1316
	1317	and this is what it outputs:
	1318
	1319	ORIGINAL
	1320	0: zero
	1321	1: one
	1322	2: two
	1323	3: three
	1324	4: four
	1325
	1326	The last record was [four]
	1327	The first record was [zero]
	1328
	1329	REVERSE
	1330	5: last
	1331	4: three
	1332	3: Newbie
	1333	2: one
	1334	1: New One
	1335	0: first
	1336
	1337	REVERSE again
	1338	5: last
	1339	4: three
	1340	3: Newbie
	1341	2: one
	1342	1: New One
	1343	0: first
	1344
	1345	Notes:
	1346
	1347	=over 5
	1348
	1349	=item 1.
	1350
	1351	Rather than iterating through the array, C<@h> like this:
	1352
	1353	foreach $i (@h)
	1354
	1355	it is necessary to use either this:
	1356
	1357	foreach $i (0 .. $H->length - 1)
	1358
	1359	or this:
	1360
	1361	for ($a = $H->get($k, $v, R_FIRST) ;
	1362	$a == 0 ;
	1363	$a = $H->get($k, $v, R_NEXT) )
	1364
	1365	=item 2.
	1366
	1367	Notice that both times the C<put> method was used the record index was
	1368	specified using a variable, C<$i>, rather than the literal value
	1369	itself. This is because C<put> will return the record number of the
	1370	inserted line via that parameter.
	1371
	1372	=back
	1373
	1374	=head1 THE API INTERFACE
	1375
	1376	As well as accessing Berkeley DB using a tied hash or array, it is also
	1377	possible to make direct use of most of the API functions defined in the
	1378	Berkeley DB documentation.
	1379
	1380	To do this you need to store a copy of the object returned from the tie.
	1381
	1382	$db = tie %hash, "DB_File", "filename" ;
	1383
	1384	Once you have done that, you can access the Berkeley DB API functions
	1385	as B<DB_File> methods directly like this:
	1386
	1387	$db->put($key, $value, R_NOOVERWRITE) ;
	1388
	1389	B<Important:> If you have saved a copy of the object returned from
	1390	C<tie>, the underlying database file will I<not> be closed until both
	1391	the tied variable is untied and all copies of the saved object are
	1392	destroyed.
	1393
	1394	use DB_File ;
	1395	$db = tie %hash, "DB_File", "filename"
	1396	or die "Cannot tie filename: $!" ;
	1397	...
	1398	undef $db ;
	1399	untie %hash ;
	1400
	1401	See L<The untie() Gotcha> for more details.
	1402
	1403	All the functions defined in L<dbopen> are available except for
	1404	close() and dbopen() itself. The B<DB_File> method interface to the
	1405	supported functions have been implemented to mirror the way Berkeley DB
	1406	works whenever possible. In particular note that:
	1407
	1408	=over 5
	1409
	1410	=item *
	1411
	1412	The methods return a status value. All return 0 on success.
	1413	All return -1 to signify an error and set C<$!> to the exact
	1414	error code. The return code 1 generally (but not always) means that the
	1415	key specified did not exist in the database.
	1416
	1417	Other return codes are defined. See below and in the Berkeley DB
	1418	documentation for details. The Berkeley DB documentation should be used
	1419	as the definitive source.
	1420
	1421	=item *
	1422
	1423	Whenever a Berkeley DB function returns data via one of its parameters,
	1424	the equivalent B<DB_File> method does exactly the same.
	1425
	1426	=item *
	1427
	1428	If you are careful, it is possible to mix API calls with the tied
	1429	hash/array interface in the same piece of code. Although only a few of
	1430	the methods used to implement the tied interface currently make use of
	1431	the cursor, you should always assume that the cursor has been changed
	1432	any time the tied hash/array interface is used. As an example, this
	1433	code will probably not do what you expect:
	1434
	1435	$X = tie %x, 'DB_File', $filename, O_RDWR\|O_CREAT, 0777, $DB_BTREE
	1436	or die "Cannot tie $filename: $!" ;
	1437
	1438	# Get the first key/value pair and set the cursor
	1439	$X->seq($key, $value, R_FIRST) ;
	1440
	1441	# this line will modify the cursor
	1442	$count = scalar keys %x ;
	1443
	1444	# Get the second key/value pair.
	1445	# oops, it didn't, it got the last key/value pair!
	1446	$X->seq($key, $value, R_NEXT) ;
	1447
	1448	The code above can be rearranged to get around the problem, like this:
	1449
	1450	$X = tie %x, 'DB_File', $filename, O_RDWR\|O_CREAT, 0777, $DB_BTREE
	1451	or die "Cannot tie $filename: $!" ;
	1452
	1453	# this line will modify the cursor
	1454	$count = scalar keys %x ;
	1455
	1456	# Get the first key/value pair and set the cursor
	1457	$X->seq($key, $value, R_FIRST) ;
	1458
	1459	# Get the second key/value pair.
	1460	# worked this time.
	1461	$X->seq($key, $value, R_NEXT) ;
	1462
	1463	=back
	1464
	1465	All the constants defined in L<dbopen> for use in the flags parameters
	1466	in the methods defined below are also available. Refer to the Berkeley
	1467	DB documentation for the precise meaning of the flags values.
	1468
	1469	Below is a list of the methods available.
	1470
	1471	=over 5
	1472
	1473	=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
	1474
	1475	Given a key (C<$key>) this method reads the value associated with it
	1476	from the database. The value read from the database is returned in the
	1477	C<$value> parameter.
	1478
	1479	If the key does not exist the method returns 1.
	1480
	1481	No flags are currently defined for this method.
	1482
	1483	=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
	1484
	1485	Stores the key/value pair in the database.
	1486
	1487	If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
	1488	will have the record number of the inserted key/value pair set.
	1489
	1490	Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
	1491	R_SETCURSOR.
	1492
	1493	=item B<$status = $X-E<gt>del($key [, $flags]) ;>
	1494
	1495	Removes all key/value pairs with key C<$key> from the database.
	1496
	1497	A return code of 1 means that the requested key was not in the
	1498	database.
	1499
	1500	R_CURSOR is the only valid flag at present.
	1501
	1502	=item B<$status = $X-E<gt>fd ;>
	1503
	1504	Returns the file descriptor for the underlying database.
	1505
	1506	See L<Locking: The Trouble with fd> for an explanation for why you should
	1507	not use C<fd> to lock your database.
	1508
	1509	=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
	1510
	1511	This interface allows sequential retrieval from the database. See
	1512	L<dbopen> for full details.
	1513
	1514	Both the C<$key> and C<$value> parameters will be set to the key/value
	1515	pair read from the database.
	1516
	1517	The flags parameter is mandatory. The valid flag values are R_CURSOR,
	1518	R_FIRST, R_LAST, R_NEXT and R_PREV.
	1519
	1520	=item B<$status = $X-E<gt>sync([$flags]) ;>
	1521
	1522	Flushes any cached buffers to disk.
	1523
	1524	R_RECNOSYNC is the only valid flag at present.
	1525
	1526	=back
	1527
	1528	=head1 DBM FILTERS
	1529
	1530	A DBM Filter is a piece of code that is be used when you I<always>
	1531	want to make the same transformation to all keys and/or values in a
	1532	DBM database.
	1533
	1534	There are four methods associated with DBM Filters. All work identically,
	1535	and each is used to install (or uninstall) a single DBM Filter. Each
	1536	expects a single parameter, namely a reference to a sub. The only
	1537	difference between them is the place that the filter is installed.
	1538
	1539	To summarise:
	1540
	1541	=over 5
	1542
	1543	=item B<filter_store_key>
	1544
	1545	If a filter has been installed with this method, it will be invoked
	1546	every time you write a key to a DBM database.
	1547
	1548	=item B<filter_store_value>
	1549
	1550	If a filter has been installed with this method, it will be invoked
	1551	every time you write a value to a DBM database.
	1552
	1553
	1554	=item B<filter_fetch_key>
	1555
	1556	If a filter has been installed with this method, it will be invoked
	1557	every time you read a key from a DBM database.
	1558
	1559	=item B<filter_fetch_value>
	1560
	1561	If a filter has been installed with this method, it will be invoked
	1562	every time you read a value from a DBM database.
	1563
	1564	=back
	1565
	1566	You can use any combination of the methods, from none, to all four.
	1567
	1568	All filter methods return the existing filter, if present, or C<undef>
	1569	in not.
	1570
	1571	To delete a filter pass C<undef> to it.
	1572
	1573	=head2 The Filter
	1574
	1575	When each filter is called by Perl, a local copy of C<$_> will contain
	1576	the key or value to be filtered. Filtering is achieved by modifying
	1577	the contents of C<$_>. The return code from the filter is ignored.
	1578
	1579	=head2 An Example -- the NULL termination problem.
	1580
	1581	Consider the following scenario. You have a DBM database
	1582	that you need to share with a third-party C application. The C application
	1583	assumes that I<all> keys and values are NULL terminated. Unfortunately
	1584	when Perl writes to DBM databases it doesn't use NULL termination, so
	1585	your Perl application will have to manage NULL termination itself. When
	1586	you write to the database you will have to use something like this:
	1587
	1588	$hash{"$key\0"} = "$value\0" ;
	1589
	1590	Similarly the NULL needs to be taken into account when you are considering
	1591	the length of existing keys/values.
	1592
	1593	It would be much better if you could ignore the NULL terminations issue
	1594	in the main application code and have a mechanism that automatically
	1595	added the terminating NULL to all keys and values whenever you write to
	1596	the database and have them removed when you read from the database. As I'm
	1597	sure you have already guessed, this is a problem that DBM Filters can
	1598	fix very easily.
	1599
	1600	use warnings ;
	1601	use strict ;
	1602	use DB_File ;
	1603
	1604	my %hash ;
	1605	my $filename = "/tmp/filt" ;
	1606	unlink $filename ;
	1607
	1608	my $db = tie %hash, 'DB_File', $filename, O_CREAT\|O_RDWR, 0666, $DB_HASH
	1609	or die "Cannot open $filename: $!\n" ;
	1610
	1611	# Install DBM Filters
	1612	$db->filter_fetch_key ( sub { s/\0$// } ) ;
	1613	$db->filter_store_key ( sub { $_ .= "\0" } ) ;
	1614	$db->filter_fetch_value( sub { s/\0$// } ) ;
	1615	$db->filter_store_value( sub { $_ .= "\0" } ) ;
	1616
	1617	$hash{"abc"} = "def" ;
	1618	my $a = $hash{"ABC"} ;
	1619	# ...
	1620	undef $db ;
	1621	untie %hash ;
	1622
	1623	Hopefully the contents of each of the filters should be
	1624	self-explanatory. Both "fetch" filters remove the terminating NULL,
	1625	and both "store" filters add a terminating NULL.
	1626
	1627
	1628	=head2 Another Example -- Key is a C int.
	1629
	1630	Here is another real-life example. By default, whenever Perl writes to
	1631	a DBM database it always writes the key and value as strings. So when
	1632	you use this:
	1633
	1634	$hash{12345} = "soemthing" ;
	1635
	1636	the key 12345 will get stored in the DBM database as the 5 byte string
	1637	"12345". If you actually want the key to be stored in the DBM database
	1638	as a C int, you will have to use C<pack> when writing, and C<unpack>
	1639	when reading.
	1640
	1641	Here is a DBM Filter that does it:
	1642
	1643	use warnings ;
	1644	use strict ;
	1645	use DB_File ;
	1646	my %hash ;
	1647	my $filename = "/tmp/filt" ;
	1648	unlink $filename ;
	1649
	1650
	1651	my $db = tie %hash, 'DB_File', $filename, O_CREAT\|O_RDWR, 0666, $DB_HASH
	1652	or die "Cannot open $filename: $!\n" ;
	1653
	1654	$db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
	1655	$db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
	1656	$hash{123} = "def" ;
	1657	# ...
	1658	undef $db ;
	1659	untie %hash ;
	1660
	1661	This time only two filters have been used -- we only need to manipulate
	1662	the contents of the key, so it wasn't necessary to install any value
	1663	filters.
	1664
	1665	=head1 HINTS AND TIPS
	1666
	1667
	1668	=head2 Locking: The Trouble with fd
	1669
	1670	Until version 1.72 of this module, the recommended technique for locking
	1671	B<DB_File> databases was to flock the filehandle returned from the "fd"
	1672	function. Unfortunately this technique has been shown to be fundamentally
	1673	flawed (Kudos to David Harris for tracking this down). Use it at your own
	1674	peril!
	1675
	1676	The locking technique went like this.
	1677
	1678	$db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT\|O_RDWR, 0644)
	1679	\|\| die "dbcreat /tmp/foo.db $!";
	1680	$fd = $db->fd;
	1681	open(DB_FH, "+<&=$fd") \|\| die "dup $!";
	1682	flock (DB_FH, LOCK_EX) \|\| die "flock: $!";
	1683	...
	1684	$db{"Tom"} = "Jerry" ;
	1685	...
	1686	flock(DB_FH, LOCK_UN);
	1687	undef $db;
	1688	untie %db;
	1689	close(DB_FH);
	1690
	1691	In simple terms, this is what happens:
	1692
	1693	=over 5
	1694
	1695	=item 1.
	1696
	1697	Use "tie" to open the database.
	1698
	1699	=item 2.
	1700
	1701	Lock the database with fd & flock.
	1702
	1703	=item 3.
	1704
	1705	Read & Write to the database.
	1706
	1707	=item 4.
	1708
	1709	Unlock and close the database.
	1710
	1711	=back
	1712
	1713	Here is the crux of the problem. A side-effect of opening the B<DB_File>
	1714	database in step 2 is that an initial block from the database will get
	1715	read from disk and cached in memory.
	1716
	1717	To see why this is a problem, consider what can happen when two processes,
	1718	say "A" and "B", both want to update the same B<DB_File> database
	1719	using the locking steps outlined above. Assume process "A" has already
	1720	opened the database and has a write lock, but it hasn't actually updated
	1721	the database yet (it has finished step 2, but not started step 3 yet). Now
	1722	process "B" tries to open the same database - step 1 will succeed,
	1723	but it will block on step 2 until process "A" releases the lock. The
	1724	important thing to notice here is that at this point in time both
	1725	processes will have cached identical initial blocks from the database.
	1726
	1727	Now process "A" updates the database and happens to change some of the
	1728	data held in the initial buffer. Process "A" terminates, flushing
	1729	all cached data to disk and releasing the database lock. At this point
	1730	the database on disk will correctly reflect the changes made by process
	1731	"A".
	1732
	1733	With the lock released, process "B" can now continue. It also updates the
	1734	database and unfortunately it too modifies the data that was in its
	1735	initial buffer. Once that data gets flushed to disk it will overwrite
	1736	some/all of the changes process "A" made to the database.
	1737
	1738	The result of this scenario is at best a database that doesn't contain
	1739	what you expect. At worst the database will corrupt.
	1740
	1741	The above won't happen every time competing process update the same
	1742	B<DB_File> database, but it does illustrate why the technique should
	1743	not be used.
	1744
	1745	=head2 Safe ways to lock a database
	1746
	1747	Starting with version 2.x, Berkeley DB has internal support for locking.
	1748	The companion module to this one, B<BerkeleyDB>, provides an interface
	1749	to this locking functionality. If you are serious about locking
	1750	Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
	1751
	1752	If using B<BerkeleyDB> isn't an option, there are a number of modules
	1753	available on CPAN that can be used to implement locking. Each one
	1754	implements locking differently and has different goals in mind. It is
	1755	therefore worth knowing the difference, so that you can pick the right
	1756	one for your application. Here are the three locking wrappers:
	1757
	1758	=over 5
	1759
	1760	=item B<Tie::DB_Lock>
	1761
	1762	A B<DB_File> wrapper which creates copies of the database file for
	1763	read access, so that you have a kind of a multiversioning concurrent read
	1764	system. However, updates are still serial. Use for databases where reads
	1765	may be lengthy and consistency problems may occur.
	1766
	1767	=item B<Tie::DB_LockFile>
	1768
	1769	A B<DB_File> wrapper that has the ability to lock and unlock the database
	1770	while it is being used. Avoids the tie-before-flock problem by simply
	1771	re-tie-ing the database when you get or drop a lock. Because of the
	1772	flexibility in dropping and re-acquiring the lock in the middle of a
	1773	session, this can be massaged into a system that will work with long
	1774	updates and/or reads if the application follows the hints in the POD
	1775	documentation.
	1776
	1777	=item B<DB_File::Lock>
	1778
	1779	An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
	1780	before tie-ing the database and drops the lock after the untie. Allows
	1781	one to use the same lockfile for multiple databases to avoid deadlock
	1782	problems, if desired. Use for databases where updates are reads are
	1783	quick and simple flock locking semantics are enough.
	1784
	1785	=back
	1786
	1787	=head2 Sharing Databases With C Applications
	1788
	1789	There is no technical reason why a Berkeley DB database cannot be
	1790	shared by both a Perl and a C application.
	1791
	1792	The vast majority of problems that are reported in this area boil down
	1793	to the fact that C strings are NULL terminated, whilst Perl strings are
	1794	not. See L<DBM FILTERS> for a generic way to work around this problem.
	1795
	1796	Here is a real example. Netscape 2.0 keeps a record of the locations you
	1797	visit along with the time you last visited them in a DB_HASH database.
	1798	This is usually stored in the file F<~/.netscape/history.db>. The key
	1799	field in the database is the location string and the value field is the
	1800	time the location was last visited stored as a 4 byte binary value.
	1801
	1802	If you haven't already guessed, the location string is stored with a
	1803	terminating NULL. This means you need to be careful when accessing the
	1804	database.
	1805
	1806	Here is a snippet of code that is loosely based on Tom Christiansen's
	1807	I<ggh> script (available from your nearest CPAN archive in
	1808	F<authors/id/TOMC/scripts/nshist.gz>).
	1809
	1810	use warnings ;
	1811	use strict ;
	1812	use DB_File ;
	1813	use Fcntl ;
	1814
	1815	use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
	1816	$dotdir = $ENV{HOME} \|\| $ENV{LOGNAME};
	1817
	1818	$HISTORY = "$dotdir/.netscape/history.db";
	1819
	1820	tie %hist_db, 'DB_File', $HISTORY
	1821	or die "Cannot open $HISTORY: $!\n" ;;
	1822
	1823	# Dump the complete database
	1824	while ( ($href, $binary_time) = each %hist_db ) {
	1825
	1826	# remove the terminating NULL
	1827	$href =~ s/\x00$// ;
	1828
	1829	# convert the binary time into a user friendly string
	1830	$date = localtime unpack("V", $binary_time);
	1831	print "$date $href\n" ;
	1832	}
	1833
	1834	# check for the existence of a specific key
	1835	# remember to add the NULL
	1836	if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
	1837	$date = localtime unpack("V", $binary_time) ;
	1838	print "Last visited mox.perl.com on $date\n" ;
	1839	}
	1840	else {
	1841	print "Never visited mox.perl.com\n"
	1842	}
	1843
	1844	untie %hist_db ;
	1845
	1846	=head2 The untie() Gotcha
	1847
	1848	If you make use of the Berkeley DB API, it is I<very> strongly
	1849	recommended that you read L<perltie/The untie Gotcha>.
	1850
	1851	Even if you don't currently make use of the API interface, it is still
	1852	worth reading it.
	1853
	1854	Here is an example which illustrates the problem from a B<DB_File>
	1855	perspective:
	1856
	1857	use DB_File ;
	1858	use Fcntl ;
	1859
	1860	my %x ;
	1861	my $X ;
	1862
	1863	$X = tie %x, 'DB_File', 'tst.fil' , O_RDWR\|O_TRUNC
	1864	or die "Cannot tie first time: $!" ;
	1865
	1866	$x{123} = 456 ;
	1867
	1868	untie %x ;
	1869
	1870	tie %x, 'DB_File', 'tst.fil' , O_RDWR\|O_CREAT
	1871	or die "Cannot tie second time: $!" ;
	1872
	1873	untie %x ;
	1874
	1875	When run, the script will produce this error message:
	1876
	1877	Cannot tie second time: Invalid argument at bad.file line 14.
	1878
	1879	Although the error message above refers to the second tie() statement
	1880	in the script, the source of the problem is really with the untie()
	1881	statement that precedes it.
	1882
	1883	Having read L<perltie> you will probably have already guessed that the
	1884	error is caused by the extra copy of the tied object stored in C<$X>.
	1885	If you haven't, then the problem boils down to the fact that the
	1886	B<DB_File> destructor, DESTROY, will not be called until I<all>
	1887	references to the tied object are destroyed. Both the tied variable,
	1888	C<%x>, and C<$X> above hold a reference to the object. The call to
	1889	untie() will destroy the first, but C<$X> still holds a valid
	1890	reference, so the destructor will not get called and the database file
	1891	F<tst.fil> will remain open. The fact that Berkeley DB then reports the
	1892	attempt to open a database that is already open via the catch-all
	1893	"Invalid argument" doesn't help.
	1894
	1895	If you run the script with the C<-w> flag the error message becomes:
	1896
	1897	untie attempted while 1 inner references still exist at bad.file line 12.
	1898	Cannot tie second time: Invalid argument at bad.file line 14.
	1899
	1900	which pinpoints the real problem. Finally the script can now be
	1901	modified to fix the original problem by destroying the API object
	1902	before the untie:
	1903
	1904	...
	1905	$x{123} = 456 ;
	1906
	1907	undef $X ;
	1908	untie %x ;
	1909
	1910	$X = tie %x, 'DB_File', 'tst.fil' , O_RDWR\|O_CREAT
	1911	...
	1912
	1913
	1914	=head1 COMMON QUESTIONS
	1915
	1916	=head2 Why is there Perl source in my database?
	1917
	1918	If you look at the contents of a database file created by DB_File,
	1919	there can sometimes be part of a Perl script included in it.
	1920
	1921	This happens because Berkeley DB uses dynamic memory to allocate
	1922	buffers which will subsequently be written to the database file. Being
	1923	dynamic, the memory could have been used for anything before DB
	1924	malloced it. As Berkeley DB doesn't clear the memory once it has been
	1925	allocated, the unused portions will contain random junk. In the case
	1926	where a Perl script gets written to the database, the random junk will
	1927	correspond to an area of dynamic memory that happened to be used during
	1928	the compilation of the script.
	1929
	1930	Unless you don't like the possibility of there being part of your Perl
	1931	scripts embedded in a database file, this is nothing to worry about.
	1932
	1933	=head2 How do I store complex data structures with DB_File?
	1934
	1935	Although B<DB_File> cannot do this directly, there is a module which
	1936	can layer transparently over B<DB_File> to accomplish this feat.
	1937
	1938	Check out the MLDBM module, available on CPAN in the directory
	1939	F<modules/by-module/MLDBM>.
	1940
	1941	=head2 What does "Invalid Argument" mean?
	1942
	1943	You will get this error message when one of the parameters in the
	1944	C<tie> call is wrong. Unfortunately there are quite a few parameters to
	1945	get wrong, so it can be difficult to figure out which one it is.
	1946
	1947	Here are a couple of possibilities:
	1948
	1949	=over 5
	1950
	1951	=item 1.
	1952
	1953	Attempting to reopen a database without closing it.
	1954
	1955	=item 2.
	1956
	1957	Using the O_WRONLY flag.
	1958
	1959	=back
	1960
	1961	=head2 What does "Bareword 'DB_File' not allowed" mean?
	1962
	1963	You will encounter this particular error message when you have the
	1964	C<strict 'subs'> pragma (or the full strict pragma) in your script.
	1965	Consider this script:
	1966
	1967	use warnings ;
	1968	use strict ;
	1969	use DB_File ;
	1970	use vars qw(%x) ;
	1971	tie %x, DB_File, "filename" ;
	1972
	1973	Running it produces the error in question:
	1974
	1975	Bareword "DB_File" not allowed while "strict subs" in use
	1976
	1977	To get around the error, place the word C<DB_File> in either single or
	1978	double quotes, like this:
	1979
	1980	tie %x, "DB_File", "filename" ;
	1981
	1982	Although it might seem like a real pain, it is really worth the effort
	1983	of having a C<use strict> in all your scripts.
	1984
	1985	=head1 REFERENCES
	1986
	1987	Articles that are either about B<DB_File> or make use of it.
	1988
	1989	=over 5
	1990
	1991	=item 1.
	1992
	1993	I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
	1994	Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
	1995
	1996	=back
	1997
	1998	=head1 HISTORY
	1999
	2000	Moved to the Changes file.
	2001
	2002	=head1 BUGS
	2003
	2004	Some older versions of Berkeley DB had problems with fixed length
	2005	records using the RECNO file format. This problem has been fixed since
	2006	version 1.85 of Berkeley DB.
	2007
	2008	I am sure there are bugs in the code. If you do find any, or can
	2009	suggest any enhancements, I would welcome your comments.
	2010
	2011	=head1 AVAILABILITY
	2012
	2013	B<DB_File> comes with the standard Perl source distribution. Look in
	2014	the directory F<ext/DB_File>. Given the amount of time between releases
	2015	of Perl the version that ships with Perl is quite likely to be out of
	2016	date, so the most recent version can always be found on CPAN (see
	2017	L<perlmod/CPAN> for details), in the directory
	2018	F<modules/by-module/DB_File>.
	2019
	2020	This version of B<DB_File> will work with either version 1.x, 2.x or
	2021	3.x of Berkeley DB, but is limited to the functionality provided by
	2022	version 1.
	2023
	2024	The official web site for Berkeley DB is F<http://www.sleepycat.com>.
	2025	All versions of Berkeley DB are available there.
	2026
	2027	Alternatively, Berkeley DB version 1 is available at your nearest CPAN
	2028	archive in F<src/misc/db.1.85.tar.gz>.
	2029
	2030	If you are running IRIX, then get Berkeley DB version 1 from
	2031	F<http://reality.sgi.com/ariel>. It has the patches necessary to
	2032	compile properly on IRIX 5.3.
	2033
	2034	=head1 COPYRIGHT
	2035
	2036	Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
	2037	is free software; you can redistribute it and/or modify it under the
	2038	same terms as Perl itself.
	2039
	2040	Although B<DB_File> is covered by the Perl license, the library it
	2041	makes use of, namely Berkeley DB, is not. Berkeley DB has its own
	2042	copyright and its own license. Please take the time to read it.
	2043
	2044	Here are are few words taken from the Berkeley DB FAQ (at
	2045	F<http://www.sleepycat.com>) regarding the license:
	2046
	2047	Do I have to license DB to use it in Perl scripts?
	2048
	2049	No. The Berkeley DB license requires that software that uses
	2050	Berkeley DB be freely redistributable. In the case of Perl, that
	2051	software is Perl, and not your scripts. Any Perl scripts that you
	2052	write are your property, including scripts that make use of
	2053	Berkeley DB. Neither the Perl license nor the Berkeley DB license
	2054	place any restriction on what you may do with them.
	2055
	2056	If you are in any doubt about the license situation, contact either the
	2057	Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
	2058
	2059
	2060	=head1 SEE ALSO
	2061
	2062	L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
	2063	L<dbmfilter>
	2064
	2065	=head1 AUTHOR
	2066
	2067	The DB_File interface was written by Paul Marquess
	2068	E<lt>Paul.Marquess@btinternet.comE<gt>.
	2069	Questions about the DB system itself may be addressed to
	2070	E<lt>db@sleepycat.com<gt>.
	2071
	2072	=cut