perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	# -- mode: perl; perl-indent-level: 2; --
	2	# Memoize.pm
	3	#
	4	# Transparent memoization of idempotent functions
	5	#
	6	# Copyright 1998, 1999, 2000, 2001 M-J. Dominus.
	7	# You may copy and distribute this program under the
	8	# same terms as Perl itself. If in doubt,
	9	# write to mjd-perl-memoize+@plover.com for a license.
	10	#
	11	# Version 1.01 $Revision: 1.18 $ $Date: 2001/06/24 17:16:47 $
	12
	13	package Memoize;
	14	$VERSION = '1.01_01';
	15
	16	# Compile-time constants
	17	sub SCALAR () { 0 }
	18	sub LIST () { 1 }
	19
	20
	21	#
	22	# Usage memoize(functionname/ref,
	23	# { NORMALIZER => coderef, INSTALL => name,
	24	# LIST_CACHE => descriptor, SCALAR_CACHE => descriptor }
	25	#
	26
	27	use Carp;
	28	use Exporter;
	29	use vars qw($DEBUG);
	30	use Config; # Dammit.
	31	@ISA = qw(Exporter);
	32	@EXPORT = qw(memoize);
	33	@EXPORT_OK = qw(unmemoize flush_cache);
	34	use strict;
	35
	36	my %memotable;
	37	my %revmemotable;
	38	my @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH);
	39	my %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS;
	40
	41	# Raise an error if the user tries to specify one of thesepackage as a
	42	# tie for LIST_CACHE
	43
	44	my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File);
	45
	46	sub memoize {
	47	my $fn = shift;
	48	my %options = @_;
	49	my $options = \%options;
	50
	51	unless (defined($fn) &&
	52	(ref $fn eq 'CODE' \|\| ref $fn eq '')) {
	53	croak "Usage: memoize 'functionname'\|coderef {OPTIONS}";
	54	}
	55
	56	my $uppack = caller; # TCL me Elmo!
	57	my $cref; # Code reference to original function
	58	my $name = (ref $fn ? undef : $fn);
	59
	60	# Convert function names to code references
	61	$cref = &_make_cref($fn, $uppack);
	62
	63	# Locate function prototype, if any
	64	my $proto = prototype $cref;
	65	if (defined $proto) { $proto = "($proto)" }
	66	else { $proto = "" }
	67
	68	# I would like to get rid of the eval, but there seems not to be any
	69	# other way to set the prototype properly. The switch here for
	70	# 'usethreads' works around a bug in threadperl having to do with
	71	# magic goto. It would be better to fix the bug and use the magic
	72	# goto version everywhere.
	73	my $wrapper =
	74	$Config{usethreads}
	75	? eval "sub $proto { &_memoizer(\$cref, \@_); }"
	76	: eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }";
	77
	78	my $normalizer = $options{NORMALIZER};
	79	if (defined $normalizer && ! ref $normalizer) {
	80	$normalizer = _make_cref($normalizer, $uppack);
	81	}
	82
	83	my $install_name;
	84	if (defined $options->{INSTALL}) {
	85	# INSTALL => name
	86	$install_name = $options->{INSTALL};
	87	} elsif (! exists $options->{INSTALL}) {
	88	# No INSTALL option provided; use original name if possible
	89	$install_name = $name;
	90	} else {
	91	# INSTALL => undef means don't install
	92	}
	93
	94	if (defined $install_name) {
	95	$install_name = $uppack . '::' . $install_name
	96	unless $install_name =~ /::/;
	97	no strict;
	98	local($^W) = 0; # ``Subroutine $install_name redefined at ...''
	99	*{$install_name} = $wrapper; # Install memoized version
	100	}
	101
	102	$revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key
	103
	104	# These will be the caches
	105	my %caches;
	106	for my $context (qw(SCALAR LIST)) {
	107	# suppress subsequent 'uninitialized value' warnings
	108	$options{"${context}_CACHE"} \|\|= '';
	109
	110	my $cache_opt = $options{"${context}_CACHE"};
	111	my @cache_opt_args;
	112	if (ref $cache_opt) {
	113	@cache_opt_args = @$cache_opt;
	114	$cache_opt = shift @cache_opt_args;
	115	}
	116	if ($cache_opt eq 'FAULT') { # no cache
	117	$caches{$context} = undef;
	118	} elsif ($cache_opt eq 'HASH') { # user-supplied hash
	119	my $cache = $cache_opt_args[0];
	120	my $package = ref(tied %$cache);
	121	if ($context eq 'LIST' && $scalar_only{$package}) {
	122	croak("You can't use $package for LIST_CACHE because it can only store scalars");
	123	}
	124	$caches{$context} = $cache;
	125	} elsif ($cache_opt eq '' \|\| $IS_CACHE_TAG{$cache_opt}) {
	126	# default is that we make up an in-memory hash
	127	$caches{$context} = {};
	128	# (this might get tied later, or MERGEd away)
	129	} else {
	130	croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting";
	131	}
	132	}
	133
	134	# Perhaps I should check here that you didn't supply both merge
	135	# options. But if you did, it does do something reasonable: They
	136	# both get merged to the same in-memory hash.
	137	if ($options{SCALAR_CACHE} eq 'MERGE') {
	138	$caches{SCALAR} = $caches{LIST};
	139	} elsif ($options{LIST_CACHE} eq 'MERGE') {
	140	$caches{LIST} = $caches{SCALAR};
	141	}
	142
	143	# Now deal with the TIE options
	144	{
	145	my $context;
	146	foreach $context (qw(SCALAR LIST)) {
	147	# If the relevant option wasn't `TIE', this call does nothing.
	148	_my_tie($context, $caches{$context}, $options); # Croaks on failure
	149	}
	150	}
	151
	152	# We should put some more stuff in here eventually.
	153	# We've been saying that for serveral versions now.
	154	# And you know what? More stuff keeps going in!
	155	$memotable{$cref} =
	156	{
	157	O => $options, # Short keys here for things we need to access frequently
	158	N => $normalizer,
	159	U => $cref,
	160	MEMOIZED => $wrapper,
	161	PACKAGE => $uppack,
	162	NAME => $install_name,
	163	S => $caches{SCALAR},
	164	L => $caches{LIST},
	165	};
	166
	167	$wrapper # Return just memoized version
	168	}
	169
	170	# This function tries to load a tied hash class and tie the hash to it.
	171	sub _my_tie {
	172	my ($context, $hash, $options) = @_;
	173	my $fullopt = $options->{"${context}_CACHE"};
	174
	175	# We already checked to make sure that this works.
	176	my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt;
	177
	178	return unless defined $shortopt && $shortopt eq 'TIE';
	179	carp("TIE option to memoize() is deprecated; use HASH instead")
	180	if $^W;
	181
	182	my @args = ref $fullopt ? @$fullopt : ();
	183	shift @args;
	184	my $module = shift @args;
	185	if ($context eq 'LIST' && $scalar_only{$module}) {
	186	croak("You can't use $module for LIST_CACHE because it can only store scalars");
	187	}
	188	my $modulefile = $module . '.pm';
	189	$modulefile =~ s{::}{/}g;
	190	eval { require $modulefile };
	191	if ($@) {
	192	croak "Memoize: Couldn't load hash tie module `$module': $@; aborting";
	193	}
	194	my $rc = (tie %$hash => $module, @args);
	195	unless ($rc) {
	196	croak "Memoize: Couldn't tie hash to `$module': $!; aborting";
	197	}
	198	1;
	199	}
	200
	201	sub flush_cache {
	202	my $func = _make_cref($_[0], scalar caller);
	203	my $info = $memotable{$revmemotable{$func}};
	204	die "$func not memoized" unless defined $info;
	205	for my $context (qw(S L)) {
	206	my $cache = $info->{$context};
	207	if (tied %$cache && ! (tied %$cache)->can('CLEAR')) {
	208	my $funcname = defined($info->{NAME}) ?
	209	"function $info->{NAME}" : "anonymous function $func";
	210	my $context = {S => 'scalar', L => 'list'}->{$context};
	211	croak "Tied cache hash for $context-context $funcname does not support flushing";
	212	} else {
	213	%$cache = ();
	214	}
	215	}
	216	}
	217
	218	# This is the function that manages the memo tables.
	219	sub _memoizer {
	220	my $orig = shift; # stringized version of ref to original func.
	221	my $info = $memotable{$orig};
	222	my $normalizer = $info->{N};
	223
	224	my $argstr;
	225	my $context = (wantarray() ? LIST : SCALAR);
	226
	227	if (defined $normalizer) {
	228	no strict;
	229	if ($context == SCALAR) {
	230	$argstr = &{$normalizer}(@_);
	231	} elsif ($context == LIST) {
	232	($argstr) = &{$normalizer}(@_);
	233	} else {
	234	croak "Internal error \#41; context was neither LIST nor SCALAR\n";
	235	}
	236	} else { # Default normalizer
	237	local $^W = 0;
	238	$argstr = join chr(28),@_;
	239	}
	240
	241	if ($context == SCALAR) {
	242	my $cache = $info->{S};
	243	_crap_out($info->{NAME}, 'scalar') unless $cache;
	244	if (exists $cache->{$argstr}) {
	245	return $cache->{$argstr};
	246	} else {
	247	my $val = &{$info->{U}}(@_);
	248	# Scalars are considered to be lists; store appropriately
	249	if ($info->{O}{SCALAR_CACHE} eq 'MERGE') {
	250	$cache->{$argstr} = [$val];
	251	} else {
	252	$cache->{$argstr} = $val;
	253	}
	254	$val;
	255	}
	256	} elsif ($context == LIST) {
	257	my $cache = $info->{L};
	258	_crap_out($info->{NAME}, 'list') unless $cache;
	259	if (exists $cache->{$argstr}) {
	260	my $val = $cache->{$argstr};
	261	# If LISTCONTEXT=>MERGE, then the function never returns lists,
	262	# so we have a scalar value cached, so just return it straightaway:
	263	return ($val) if $info->{O}{LIST_CACHE} eq 'MERGE';
	264	# Maybe in a later version we can use a faster test.
	265
	266	# Otherwise, we cached an array containing the returned list:
	267	return @$val;
	268	} else {
	269	my $q = $cache->{$argstr} = [&{$info->{U}}(@_)];
	270	@$q;
	271	}
	272	} else {
	273	croak "Internal error \#42; context was neither LIST nor SCALAR\n";
	274	}
	275	}
	276
	277	sub unmemoize {
	278	my $f = shift;
	279	my $uppack = caller;
	280	my $cref = _make_cref($f, $uppack);
	281
	282	unless (exists $revmemotable{$cref}) {
	283	croak "Could not unmemoize function `$f', because it was not memoized to begin with";
	284	}
	285
	286	my $tabent = $memotable{$revmemotable{$cref}};
	287	unless (defined $tabent) {
	288	croak "Could not figure out how to unmemoize function `$f'";
	289	}
	290	my $name = $tabent->{NAME};
	291	if (defined $name) {
	292	no strict;
	293	local($^W) = 0; # ``Subroutine $install_name redefined at ...''
	294	*{$name} = $tabent->{U}; # Replace with original function
	295	}
	296	undef $memotable{$revmemotable{$cref}};
	297	undef $revmemotable{$cref};
	298
	299	# This removes the last reference to the (possibly tied) memo tables
	300	# my ($old_function, $memotabs) = @{$tabent}{'U','S','L'};
	301	# undef $tabent;
	302
	303	# # Untie the memo tables if they were tied.
	304	# my $i;
	305	# for $i (0,1) {
	306	# if (tied %{$memotabs->[$i]}) {
	307	# warn "Untying hash #$i\n";
	308	# untie %{$memotabs->[$i]};
	309	# }
	310	# }
	311
	312	$tabent->{U};
	313	}
	314
	315	sub _make_cref {
	316	my $fn = shift;
	317	my $uppack = shift;
	318	my $cref;
	319	my $name;
	320
	321	if (ref $fn eq 'CODE') {
	322	$cref = $fn;
	323	} elsif (! ref $fn) {
	324	if ($fn =~ /::/) {
	325	$name = $fn;
	326	} else {
	327	$name = $uppack . '::' . $fn;
	328	}
	329	no strict;
	330	if (defined $name and !defined(&$name)) {
	331	croak "Cannot operate on nonexistent function `$fn'";
	332	}
	333	# $cref = \&$name;
	334	$cref = *{$name}{CODE};
	335	} else {
	336	my $parent = (caller(1))[3]; # Function that called _make_cref
	337	croak "Usage: argument 1 to `$parent' must be a function name or reference.\n";
	338	}
	339	$DEBUG and warn "${name}($fn) => $cref in _make_cref\n";
	340	$cref;
	341	}
	342
	343	sub _crap_out {
	344	my ($funcname, $context) = @_;
	345	if (defined $funcname) {
	346	croak "Function `$funcname' called in forbidden $context context; faulting";
	347	} else {
	348	croak "Anonymous function called in forbidden $context context; faulting";
	349	}
	350	}
	351
	352	1;
	353
	354
	355
	356
	357
	358	=head1 NAME
	359
	360	Memoize - Make functions faster by trading space for time
	361
	362	=head1 SYNOPSIS
	363
	364	# This is the documentation for Memoize 1.01
	365	use Memoize;
	366	memoize('slow_function');
	367	slow_function(arguments); # Is faster than it was before
	368
	369
	370	This is normally all you need to know. However, many options are available:
	371
	372	memoize(function, options...);
	373
	374	Options include:
	375
	376	NORMALIZER => function
	377	INSTALL => new_name
	378
	379	SCALAR_CACHE => 'MEMORY'
	380	SCALAR_CACHE => ['HASH', \%cache_hash ]
	381	SCALAR_CACHE => 'FAULT'
	382	SCALAR_CACHE => 'MERGE'
	383
	384	LIST_CACHE => 'MEMORY'
	385	LIST_CACHE => ['HASH', \%cache_hash ]
	386	LIST_CACHE => 'FAULT'
	387	LIST_CACHE => 'MERGE'
	388
	389	=head1 DESCRIPTION
	390
	391	`Memoizing' a function makes it faster by trading space for time. It
	392	does this by caching the return values of the function in a table.
	393	If you call the function again with the same arguments, C<memoize>
	394	jumps in and gives you the value out of the table, instead of letting
	395	the function compute the value all over again.
	396
	397	Here is an extreme example. Consider the Fibonacci sequence, defined
	398	by the following function:
	399
	400	# Compute Fibonacci numbers
	401	sub fib {
	402	my $n = shift;
	403	return $n if $n < 2;
	404	fib($n-1) + fib($n-2);
	405	}
	406
	407	This function is very slow. Why? To compute fib(14), it first wants
	408	to compute fib(13) and fib(12), and add the results. But to compute
	409	fib(13), it first has to compute fib(12) and fib(11), and then it
	410	comes back and computes fib(12) all over again even though the answer
	411	is the same. And both of the times that it wants to compute fib(12),
	412	it has to compute fib(11) from scratch, and then it has to do it
	413	again each time it wants to compute fib(13). This function does so
	414	much recomputing of old results that it takes a really long time to
	415	run---fib(14) makes 1,200 extra recursive calls to itself, to compute
	416	and recompute things that it already computed.
	417
	418	This function is a good candidate for memoization. If you memoize the
	419	`fib' function above, it will compute fib(14) exactly once, the first
	420	time it needs to, and then save the result in a table. Then if you
	421	ask for fib(14) again, it gives you the result out of the table.
	422	While computing fib(14), instead of computing fib(12) twice, it does
	423	it once; the second time it needs the value it gets it from the table.
	424	It doesn't compute fib(11) four times; it computes it once, getting it
	425	from the table the next three times. Instead of making 1,200
	426	recursive calls to `fib', it makes 15. This makes the function about
	427	150 times faster.
	428
	429	You could do the memoization yourself, by rewriting the function, like
	430	this:
	431
	432	# Compute Fibonacci numbers, memoized version
	433	{ my @fib;
	434	sub fib {
	435	my $n = shift;
	436	return $fib[$n] if defined $fib[$n];
	437	return $fib[$n] = $n if $n < 2;
	438	$fib[$n] = fib($n-1) + fib($n-2);
	439	}
	440	}
	441
	442	Or you could use this module, like this:
	443
	444	use Memoize;
	445	memoize('fib');
	446
	447	# Rest of the fib function just like the original version.
	448
	449	This makes it easy to turn memoizing on and off.
	450
	451	Here's an even simpler example: I wrote a simple ray tracer; the
	452	program would look in a certain direction, figure out what it was
	453	looking at, and then convert the `color' value (typically a string
	454	like `red') of that object to a red, green, and blue pixel value, like
	455	this:
	456
	457	for ($direction = 0; $direction < 300; $direction++) {
	458	# Figure out which object is in direction $direction
	459	$color = $object->{color};
	460	($r, $g, $b) = @{&ColorToRGB($color)};
	461	...
	462	}
	463
	464	Since there are relatively few objects in a picture, there are only a
	465	few colors, which get looked up over and over again. Memoizing
	466	C<ColorToRGB> sped up the program by several percent.
	467
	468	=head1 DETAILS
	469
	470	This module exports exactly one function, C<memoize>. The rest of the
	471	functions in this package are None of Your Business.
	472
	473	You should say
	474
	475	memoize(function)
	476
	477	where C<function> is the name of the function you want to memoize, or
	478	a reference to it. C<memoize> returns a reference to the new,
	479	memoized version of the function, or C<undef> on a non-fatal error.
	480	At present, there are no non-fatal errors, but there might be some in
	481	the future.
	482
	483	If C<function> was the name of a function, then C<memoize> hides the
	484	old version and installs the new memoized version under the old name,
	485	so that C<&function(...)> actually invokes the memoized version.
	486
	487	=head1 OPTIONS
	488
	489	There are some optional options you can pass to C<memoize> to change
	490	the way it behaves a little. To supply options, invoke C<memoize>
	491	like this:
	492
	493	memoize(function, NORMALIZER => function,
	494	INSTALL => newname,
	495	SCALAR_CACHE => option,
	496	LIST_CACHE => option
	497	);
	498
	499	Each of these options is optional; you can include some, all, or none
	500	of them.
	501
	502	=head2 INSTALL
	503
	504	If you supply a function name with C<INSTALL>, memoize will install
	505	the new, memoized version of the function under the name you give.
	506	For example,
	507
	508	memoize('fib', INSTALL => 'fastfib')
	509
	510	installs the memoized version of C<fib> as C<fastfib>; without the
	511	C<INSTALL> option it would have replaced the old C<fib> with the
	512	memoized version.
	513
	514	To prevent C<memoize> from installing the memoized version anywhere, use
	515	C<INSTALL =E<gt> undef>.
	516
	517	=head2 NORMALIZER
	518
	519	Suppose your function looks like this:
	520
	521	# Typical call: f('aha!', A => 11, B => 12);
	522	sub f {
	523	my $a = shift;
	524	my %hash = @_;
	525	$hash{B} \|\|= 2; # B defaults to 2
	526	$hash{C} \|\|= 7; # C defaults to 7
	527
	528	# Do something with $a, %hash
	529	}
	530
	531	Now, the following calls to your function are all completely equivalent:
	532
	533	f(OUCH);
	534	f(OUCH, B => 2);
	535	f(OUCH, C => 7);
	536	f(OUCH, B => 2, C => 7);
	537	f(OUCH, C => 7, B => 2);
	538	(etc.)
	539
	540	However, unless you tell C<Memoize> that these calls are equivalent,
	541	it will not know that, and it will compute the values for these
	542	invocations of your function separately, and store them separately.
	543
	544	To prevent this, supply a C<NORMALIZER> function that turns the
	545	program arguments into a string in a way that equivalent arguments
	546	turn into the same string. A C<NORMALIZER> function for C<f> above
	547	might look like this:
	548
	549	sub normalize_f {
	550	my $a = shift;
	551	my %hash = @_;
	552	$hash{B} \|\|= 2;
	553	$hash{C} \|\|= 7;
	554
	555	join(',', $a, map ($_ => $hash{$_}) sort keys %hash);
	556	}
	557
	558	Each of the argument lists above comes out of the C<normalize_f>
	559	function looking exactly the same, like this:
	560
	561	OUCH,B,2,C,7
	562
	563	You would tell C<Memoize> to use this normalizer this way:
	564
	565	memoize('f', NORMALIZER => 'normalize_f');
	566
	567	C<memoize> knows that if the normalized version of the arguments is
	568	the same for two argument lists, then it can safely look up the value
	569	that it computed for one argument list and return it as the result of
	570	calling the function with the other argument list, even if the
	571	argument lists look different.
	572
	573	The default normalizer just concatenates the arguments with character
	574	28 in between. (In ASCII, this is called FS or control-\.) This
	575	always works correctly for functions with only one string argument,
	576	and also when the arguments never contain character 28. However, it
	577	can confuse certain argument lists:
	578
	579	normalizer("a\034", "b")
	580	normalizer("a", "\034b")
	581	normalizer("a\034\034b")
	582
	583	for example.
	584
	585	Since hash keys are strings, the default normalizer will not
	586	distinguish between C<undef> and the empty string. It also won't work
	587	when the function's arguments are references. For example, consider a
	588	function C<g> which gets two arguments: A number, and a reference to
	589	an array of numbers:
	590
	591	g(13, [1,2,3,4,5,6,7]);
	592
	593	The default normalizer will turn this into something like
	594	C<"13\034ARRAY(0x436c1f)">. That would be all right, except that a
	595	subsequent array of numbers might be stored at a different location
	596	even though it contains the same data. If this happens, C<Memoize>
	597	will think that the arguments are different, even though they are
	598	equivalent. In this case, a normalizer like this is appropriate:
	599
	600	sub normalize { join ' ', $_[0], @{$_[1]} }
	601
	602	For the example above, this produces the key "13 1 2 3 4 5 6 7".
	603
	604	Another use for normalizers is when the function depends on data other
	605	than those in its arguments. Suppose you have a function which
	606	returns a value which depends on the current hour of the day:
	607
	608	sub on_duty {
	609	my ($problem_type) = @_;
	610	my $hour = (localtime)[2];
	611	open my $fh, "$DIR/$problem_type" or die...;
	612	my $line;
	613	while ($hour-- > 0)
	614	$line = <$fh>;
	615	}
	616	return $line;
	617	}
	618
	619	At 10:23, this function generates the 10th line of a data file; at
	620	3:45 PM it generates the 15th line instead. By default, C<Memoize>
	621	will only see the $problem_type argument. To fix this, include the
	622	current hour in the normalizer:
	623
	624	sub normalize { join ' ', (localtime)[2], @_ }
	625
	626	The calling context of the function (scalar or list context) is
	627	propagated to the normalizer. This means that if the memoized
	628	function will treat its arguments differently in list context than it
	629	would in scalar context, you can have the normalizer function select
	630	its behavior based on the results of C<wantarray>. Even if called in
	631	a list context, a normalizer should still return a single string.
	632
	633	=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
	634
	635	Normally, C<Memoize> caches your function's return values into an
	636	ordinary Perl hash variable. However, you might like to have the
	637	values cached on the disk, so that they persist from one run of your
	638	program to the next, or you might like to associate some other
	639	interesting semantics with the cached values.
	640
	641	There's a slight complication under the hood of C<Memoize>: There are
	642	actually I<two> caches, one for scalar values and one for list values.
	643	When your function is called in scalar context, its return value is
	644	cached in one hash, and when your function is called in list context,
	645	its value is cached in the other hash. You can control the caching
	646	behavior of both contexts independently with these options.
	647
	648	The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
	649	the following four strings:
	650
	651	MEMORY
	652	FAULT
	653	MERGE
	654	HASH
	655
	656	or else it must be a reference to a list whose first element is one of
	657	these four strings, such as C<[HASH, arguments...]>.
	658
	659	=over 4
	660
	661	=item C<MEMORY>
	662
	663	C<MEMORY> means that return values from the function will be cached in
	664	an ordinary Perl hash variable. The hash variable will not persist
	665	after the program exits. This is the default.
	666
	667	=item C<HASH>
	668
	669	C<HASH> allows you to specify that a particular hash that you supply
	670	will be used as the cache. You can tie this hash beforehand to give
	671	it any behavior you want.
	672
	673	A tied hash can have any semantics at all. It is typically tied to an
	674	on-disk database, so that cached values are stored in the database and
	675	retrieved from it again when needed, and the disk file typically
	676	persists after your program has exited. See C<perltie> for more
	677	complete details about C<tie>.
	678
	679	A typical example is:
	680
	681	use DB_File;
	682	tie my %cache => 'DB_File', $filename, O_RDWR\|O_CREAT, 0666;
	683	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
	684
	685	This has the effect of storing the cache in a C<DB_File> database
	686	whose name is in C<$filename>. The cache will persist after the
	687	program has exited. Next time the program runs, it will find the
	688	cache already populated from the previous run of the program. Or you
	689	can forcibly populate the cache by constructing a batch program that
	690	runs in the background and populates the cache file. Then when you
	691	come to run your real program the memoized function will be fast
	692	because all its results have been precomputed.
	693
	694	=item C<TIE>
	695
	696	This option is no longer supported. It is still documented only to
	697	aid in the debugging of old programs that use it. Old programs should
	698	be converted to use the C<HASH> option instead.
	699
	700	memoize ... [TIE, PACKAGE, ARGS...]
	701
	702	is merely a shortcut for
	703
	704	require PACKAGE;
	705	{ my %cache;
	706	tie %cache, PACKAGE, ARGS...;
	707	}
	708	memoize ... [HASH => \%cache];
	709
	710	=item C<FAULT>
	711
	712	C<FAULT> means that you never expect to call the function in scalar
	713	(or list) context, and that if C<Memoize> detects such a call, it
	714	should abort the program. The error message is one of
	715
	716	`foo' function called in forbidden list context at line ...
	717	`foo' function called in forbidden scalar context at line ...
	718
	719	=item C<MERGE>
	720
	721	C<MERGE> normally means the function does not distinguish between list
	722	and sclar context, and that return values in both contexts should be
	723	stored together. C<LIST_CACHE =E<gt> MERGE> means that list context
	724	return values should be stored in the same hash that is used for
	725	scalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the
	726	same, mutatis mutandis. It is an error to specify C<MERGE> for both,
	727	but it probably does something useful.
	728
	729	Consider this function:
	730
	731	sub pi { 3; }
	732
	733	Normally, the following code will result in two calls to C<pi>:
	734
	735	$x = pi();
	736	($y) = pi();
	737	$z = pi();
	738
	739	The first call caches the value C<3> in the scalar cache; the second
	740	caches the list C<(3)> in the list cache. The third call doesn't call
	741	the real C<pi> function; it gets the value from the scalar cache.
	742
	743	Obviously, the second call to C<pi> is a waste of time, and storing
	744	its return value is a waste of space. Specifying C<LIST_CACHE =E<gt>
	745	MERGE> will make C<memoize> use the same cache for scalar and list
	746	context return values, so that the second call uses the scalar cache
	747	that was populated by the first call. C<pi> ends up being called only
	748	once, and both subsequent calls return C<3> from the cache, regardless
	749	of the calling context.
	750
	751	Another use for C<MERGE> is when you want both kinds of return values
	752	stored in the same disk file; this saves you from having to deal with
	753	two disk files instead of one. You can use a normalizer function to
	754	keep the two sets of return values separate. For example:
	755
	756	tie my %cache => 'MLDBM', 'DB_File', $filename, ...;
	757
	758	memoize 'myfunc',
	759	NORMALIZER => 'n',
	760	SCALAR_CACHE => [HASH => \%cache],
	761	LIST_CACHE => MERGE,
	762	;
	763
	764	sub n {
	765	my $context = wantarray() ? 'L' : 'S';
	766	# ... now compute the hash key from the arguments ...
	767	$hashkey = "$context:$hashkey";
	768	}
	769
	770	This normalizer function will store scalar context return values in
	771	the disk file under keys that begin with C<S:>, and list context
	772	return values under keys that begin with C<L:>.
	773
	774	=back
	775
	776	=head1 OTHER FACILITIES
	777
	778	=head2 C<unmemoize>
	779
	780	There's an C<unmemoize> function that you can import if you want to.
	781	Why would you want to? Here's an example: Suppose you have your cache
	782	tied to a DBM file, and you want to make sure that the cache is
	783	written out to disk if someone interrupts the program. If the program
	784	exits normally, this will happen anyway, but if someone types
	785	control-C or something then the program will terminate immediately
	786	without synchronizing the database. So what you can do instead is
	787
	788	$SIG{INT} = sub { unmemoize 'function' };
	789
	790	C<unmemoize> accepts a reference to, or the name of a previously
	791	memoized function, and undoes whatever it did to provide the memoized
	792	version in the first place, including making the name refer to the
	793	unmemoized version if appropriate. It returns a reference to the
	794	unmemoized version of the function.
	795
	796	If you ask it to unmemoize a function that was never memoized, it
	797	croaks.
	798
	799	=head2 C<flush_cache>
	800
	801	C<flush_cache(function)> will flush out the caches, discarding I<all>
	802	the cached data. The argument may be a function name or a reference
	803	to a function. For finer control over when data is discarded or
	804	expired, see the documentation for C<Memoize::Expire>, included in
	805	this package.
	806
	807	Note that if the cache is a tied hash, C<flush_cache> will attempt to
	808	invoke the C<CLEAR> method on the hash. If there is no C<CLEAR>
	809	method, this will cause a run-time error.
	810
	811	An alternative approach to cache flushing is to use the C<HASH> option
	812	(see above) to request that C<Memoize> use a particular hash variable
	813	as its cache. Then you can examine or modify the hash at any time in
	814	any way you desire. You may flush the cache by using C<%hash = ()>.
	815
	816	=head1 CAVEATS
	817
	818	Memoization is not a cure-all:
	819
	820	=over 4
	821
	822	=item *
	823
	824	Do not memoize a function whose behavior depends on program
	825	state other than its own arguments, such as global variables, the time
	826	of day, or file input. These functions will not produce correct
	827	results when memoized. For a particularly easy example:
	828
	829	sub f {
	830	time;
	831	}
	832
	833	This function takes no arguments, and as far as C<Memoize> is
	834	concerned, it always returns the same result. C<Memoize> is wrong, of
	835	course, and the memoized version of this function will call C<time> once
	836	to get the current time, and it will return that same time
	837	every time you call it after that.
	838
	839	=item *
	840
	841	Do not memoize a function with side effects.
	842
	843	sub f {
	844	my ($a, $b) = @_;
	845	my $s = $a + $b;
	846	print "$a + $b = $s.\n";
	847	}
	848
	849	This function accepts two arguments, adds them, and prints their sum.
	850	Its return value is the numuber of characters it printed, but you
	851	probably didn't care about that. But C<Memoize> doesn't understand
	852	that. If you memoize this function, you will get the result you
	853	expect the first time you ask it to print the sum of 2 and 3, but
	854	subsequent calls will return 1 (the return value of
	855	C<print>) without actually printing anything.
	856
	857	=item *
	858
	859	Do not memoize a function that returns a data structure that is
	860	modified by its caller.
	861
	862	Consider these functions: C<getusers> returns a list of users somehow,
	863	and then C<main> throws away the first user on the list and prints the
	864	rest:
	865
	866	sub main {
	867	my $userlist = getusers();
	868	shift @$userlist;
	869	foreach $u (@$userlist) {
	870	print "User $u\n";
	871	}
	872	}
	873
	874	sub getusers {
	875	my @users;
	876	# Do something to get a list of users;
	877	\@users; # Return reference to list.
	878	}
	879
	880	If you memoize C<getusers> here, it will work right exactly once. The
	881	reference to the users list will be stored in the memo table. C<main>
	882	will discard the first element from the referenced list. The next
	883	time you invoke C<main>, C<Memoize> will not call C<getusers>; it will
	884	just return the same reference to the same list it got last time. But
	885	this time the list has already had its head removed; C<main> will
	886	erroneously remove another element from it. The list will get shorter
	887	and shorter every time you call C<main>.
	888
	889	Similarly, this:
	890
	891	$u1 = getusers();
	892	$u2 = getusers();
	893	pop @$u1;
	894
	895	will modify $u2 as well as $u1, because both variables are references
	896	to the same array. Had C<getusers> not been memoized, $u1 and $u2
	897	would have referred to different arrays.
	898
	899	=item *
	900
	901	Do not memoize a very simple function.
	902
	903	Recently someone mentioned to me that the Memoize module made his
	904	program run slower instead of faster. It turned out that he was
	905	memoizing the following function:
	906
	907	sub square {
	908	$_[0] * $_[0];
	909	}
	910
	911	I pointed out that C<Memoize> uses a hash, and that looking up a
	912	number in the hash is necessarily going to take a lot longer than a
	913	single multiplication. There really is no way to speed up the
	914	C<square> function.
	915
	916	Memoization is not magical.
	917
	918	=back
	919
	920	=head1 PERSISTENT CACHE SUPPORT
	921
	922	You can tie the cache tables to any sort of tied hash that you want
	923	to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
	924	C<EXISTS>. For example,
	925
	926	tie my %cache => 'GDBM_File', $filename, O_RDWR\|O_CREAT, 0666;
	927	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
	928
	929	works just fine. For some storage methods, you need a little glue.
	930
	931	C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
	932	package is a glue module called C<Memoize::SDBM_File> which does
	933	provide one. Use this instead of plain C<SDBM_File> to store your
	934	cache table on disk in an C<SDBM_File> database:
	935
	936	tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR\|O_CREAT, 0666;
	937	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
	938
	939	C<NDBM_File> has the same problem and the same solution. (Use
	940	C<Memoize::NDBM_File instead of plain NDBM_File.>)
	941
	942	C<Storable> isn't a tied hash class at all. You can use it to store a
	943	hash to disk and retrieve it again, but you can't modify the hash while
	944	it's on the disk. So if you want to store your cache table in a
	945	C<Storable> database, use C<Memoize::Storable>, which puts a hashlike
	946	front-end onto C<Storable>. The hash table is actually kept in
	947	memory, and is loaded from your C<Storable> file at the time you
	948	memoize the function, and stored back at the time you unmemoize the
	949	function (or when your program exits):
	950
	951	tie my %cache => 'Memoize::Storable', $filename;
	952	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
	953
	954	tie my %cache => 'Memoize::Storable', $filename, 'nstore';
	955	memoize 'function', SCALAR_CACHE => [HASH => \%cache];
	956
	957	Include the `nstore' option to have the C<Storable> database written
	958	in `network order'. (See L<Storable> for more details about this.)
	959
	960	The C<flush_cache()> function will raise a run-time error unless the
	961	tied package provides a C<CLEAR> method.
	962
	963	=head1 EXPIRATION SUPPORT
	964
	965	See Memoize::Expire, which is a plug-in module that adds expiration
	966	functionality to Memoize. If you don't like the kinds of policies
	967	that Memoize::Expire implements, it is easy to write your own plug-in
	968	module to implement whatever policy you desire. Memoize comes with
	969	several examples. An expiration manager that implements a LRU policy
	970	is available on CPAN as Memoize::ExpireLRU.
	971
	972	=head1 BUGS
	973
	974	The test suite is much better, but always needs improvement.
	975
	976	There is some problem with the way C<goto &f> works under threaded
	977	Perl, perhaps because of the lexical scoping of C<@_>. This is a bug
	978	in Perl, and until it is resolved, memoized functions will see a
	979	slightly different C<caller()> and will perform a little more slowly
	980	on threaded perls than unthreaded perls.
	981
	982	Some versions of C<DB_File> won't let you store data under a key of
	983	length 0. That means that if you have a function C<f> which you
	984	memoized and the cache is in a C<DB_File> database, then the value of
	985	C<f()> (C<f> called with no arguments) will not be memoized. If this
	986	is a big problem, you can supply a normalizer function that prepends
	987	C<"x"> to every key.
	988
	989	=head1 MAILING LIST
	990
	991	To join a very low-traffic mailing list for announcements about
	992	C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>.
	993
	994	=head1 AUTHOR
	995
	996	Mark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co.
	997
	998	See the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/
	999	for news and upgrades. Near this page, at
	1000	http://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about
	1001	memoization and about the internals of Memoize that appeared in The
	1002	Perl Journal, issue #13. (This article is also included in the
	1003	Memoize distribution as `article.html'.)
	1004
	1005	My upcoming book will discuss memoization (and many other fascinating
	1006	topics) in tremendous detail. It will be published by Morgan Kaufmann
	1007	in 2002, possibly under the title I<Perl Advanced Techniques
	1008	Handbook>. It will also be available on-line for free. For more
	1009	information, visit http://perl.plover.com/book/ .
	1010
	1011	To join a mailing list for announcements about C<Memoize>, send an
	1012	empty message to C<mjd-perl-memoize-request@plover.com>. This mailing
	1013	list is for announcements only and has extremely low traffic---about
	1014	two messages per year.
	1015
	1016	=head1 COPYRIGHT AND LICENSE
	1017
	1018	Copyright 1998, 1999, 2000, 2001 by Mark Jason Dominus
	1019
	1020	This library is free software; you may redistribute it and/or modify
	1021	it under the same terms as Perl itself.
	1022
	1023	=head1 THANK YOU
	1024
	1025	Many thanks to Jonathan Roy for bug reports and suggestions, to
	1026	Michael Schwern for other bug reports and patches, to Mike Cariaso for
	1027	helping me to figure out the Right Thing to Do About Expiration, to
	1028	Joshua Gerth, Joshua Chamas, Jonathan Roy (again), Mark D. Anderson,
	1029	and Andrew Johnson for more suggestions about expiration, to Brent
	1030	Powers for the Memoize::ExpireLRU module, to Ariel Scolnicov for
	1031	delightful messages about the Fibonacci function, to Dion Almaer for
	1032	thought-provoking suggestions about the default normalizer, to Walt
	1033	Mankowski and Kurt Starsinic for much help investigating problems
	1034	under threaded Perl, to Alex Dudkevich for reporting the bug in
	1035	prototyped functions and for checking my patch, to Tony Bass for many
	1036	helpful suggestions, to Jonathan Roy (again) for finding a use for
	1037	C<unmemoize()>, to Philippe Verdret for enlightening discussion of
	1038	C<Hook::PrePostCall>, to Nat Torkington for advice I ignored, to Chris
	1039	Nandor for portability advice, to Randal Schwartz for suggesting the
	1040	'C<flush_cache> function, and to Jenda Krynicky for being a light in
	1041	the world.
	1042
	1043	Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including
	1044	this module in the core and for his patient and helpful guidance
	1045	during the integration process.
	1046
	1047	=cut