This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Doc patches: assorted minor nits
[perl5.git] / lib / Memoize.pm
CommitLineData
a0cb3900
JH
1# -*- mode: perl; perl-indent-level: 2; -*-
2# Memoize.pm
3#
4# Transparent memoization of idempotent functions
5#
899dc88a 6# Copyright 1998, 1999, 2000, 2001 M-J. Dominus.
a0cb3900
JH
7# You may copy and distribute this program under the
8# same terms as Perl itself. If in doubt,
9# write to mjd-perl-memoize+@plover.com for a license.
10#
5189e6fe 11# Version 1.00 $Revision: 1.18 $ $Date: 2001/06/24 17:16:47 $
a0cb3900
JH
12
13package Memoize;
5189e6fe 14$VERSION = '1.00';
a0cb3900
JH
15
16# Compile-time constants
17sub SCALAR () { 0 }
18sub LIST () { 1 }
19
20
21#
22# Usage memoize(functionname/ref,
23# { NORMALIZER => coderef, INSTALL => name,
24# LIST_CACHE => descriptor, SCALAR_CACHE => descriptor }
25#
26
27use Carp;
28use Exporter;
29use vars qw($DEBUG);
899dc88a 30use Config; # Dammit.
a0cb3900
JH
31@ISA = qw(Exporter);
32@EXPORT = qw(memoize);
33@EXPORT_OK = qw(unmemoize flush_cache);
34use strict;
35
36my %memotable;
37my %revmemotable;
38my @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH);
39my %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS;
40
41# Raise an error if the user tries to specify one of thesepackage as a
42# tie for LIST_CACHE
43
44my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File);
45
46sub memoize {
47 my $fn = shift;
48 my %options = @_;
49 my $options = \%options;
50
51 unless (defined($fn) &&
52 (ref $fn eq 'CODE' || ref $fn eq '')) {
53 croak "Usage: memoize 'functionname'|coderef {OPTIONS}";
54 }
55
56 my $uppack = caller; # TCL me Elmo!
57 my $cref; # Code reference to original function
58 my $name = (ref $fn ? undef : $fn);
59
60 # Convert function names to code references
61 $cref = &_make_cref($fn, $uppack);
62
63 # Locate function prototype, if any
64 my $proto = prototype $cref;
65 if (defined $proto) { $proto = "($proto)" }
66 else { $proto = "" }
67
899dc88a
JH
68 # I would like to get rid of the eval, but there seems not to be any
69 # other way to set the prototype properly. The switch here for
70 # 'usethreads' works around a bug in threadperl having to do with
71 # magic goto. It would be better to fix the bug and use the magic
72 # goto version everywhere.
73 my $wrapper =
74 $Config{usethreads}
75 ? eval "sub $proto { &_memoizer(\$cref, \@_); }"
76 : eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }";
a0cb3900
JH
77
78 my $normalizer = $options{NORMALIZER};
79 if (defined $normalizer && ! ref $normalizer) {
80 $normalizer = _make_cref($normalizer, $uppack);
81 }
82
83 my $install_name;
84 if (defined $options->{INSTALL}) {
85 # INSTALL => name
86 $install_name = $options->{INSTALL};
87 } elsif (! exists $options->{INSTALL}) {
88 # No INSTALL option provided; use original name if possible
89 $install_name = $name;
90 } else {
91 # INSTALL => undef means don't install
92 }
93
94 if (defined $install_name) {
95 $install_name = $uppack . '::' . $install_name
96 unless $install_name =~ /::/;
97 no strict;
98 local($^W) = 0; # ``Subroutine $install_name redefined at ...''
99 *{$install_name} = $wrapper; # Install memoized version
100 }
101
102 $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key
103
104 # These will be the caches
105 my %caches;
106 for my $context (qw(SCALAR LIST)) {
107 # suppress subsequent 'uninitialized value' warnings
108 $options{"${context}_CACHE"} ||= '';
109
110 my $cache_opt = $options{"${context}_CACHE"};
111 my @cache_opt_args;
112 if (ref $cache_opt) {
113 @cache_opt_args = @$cache_opt;
114 $cache_opt = shift @cache_opt_args;
115 }
116 if ($cache_opt eq 'FAULT') { # no cache
117 $caches{$context} = undef;
118 } elsif ($cache_opt eq 'HASH') { # user-supplied hash
899dc88a
JH
119 my $cache = $cache_opt_args[0];
120 my $package = ref(tied %$cache);
121 if ($context eq 'LIST' && $scalar_only{$package}) {
122 croak("You can't use $package for LIST_CACHE because it can only store scalars");
123 }
124 $caches{$context} = $cache;
a0cb3900
JH
125 } elsif ($cache_opt eq '' || $IS_CACHE_TAG{$cache_opt}) {
126 # default is that we make up an in-memory hash
127 $caches{$context} = {};
128 # (this might get tied later, or MERGEd away)
129 } else {
130 croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting";
131 }
132 }
133
134 # Perhaps I should check here that you didn't supply *both* merge
135 # options. But if you did, it does do something reasonable: They
136 # both get merged to the same in-memory hash.
137 if ($options{SCALAR_CACHE} eq 'MERGE') {
138 $caches{SCALAR} = $caches{LIST};
139 } elsif ($options{LIST_CACHE} eq 'MERGE') {
140 $caches{LIST} = $caches{SCALAR};
141 }
142
143 # Now deal with the TIE options
144 {
145 my $context;
146 foreach $context (qw(SCALAR LIST)) {
147 # If the relevant option wasn't `TIE', this call does nothing.
148 _my_tie($context, $caches{$context}, $options); # Croaks on failure
149 }
150 }
151
152 # We should put some more stuff in here eventually.
153 # We've been saying that for serveral versions now.
154 # And you know what? More stuff keeps going in!
155 $memotable{$cref} =
156 {
157 O => $options, # Short keys here for things we need to access frequently
158 N => $normalizer,
159 U => $cref,
160 MEMOIZED => $wrapper,
161 PACKAGE => $uppack,
162 NAME => $install_name,
163 S => $caches{SCALAR},
164 L => $caches{LIST},
165 };
166
167 $wrapper # Return just memoized version
168}
169
5189e6fe
JH
170use warnings::register;
171
a0cb3900
JH
172# This function tries to load a tied hash class and tie the hash to it.
173sub _my_tie {
174 my ($context, $hash, $options) = @_;
175 my $fullopt = $options->{"${context}_CACHE"};
176
177 # We already checked to make sure that this works.
178 my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt;
179
180 return unless defined $shortopt && $shortopt eq 'TIE';
5189e6fe
JH
181 carp("TIE option to memoize() is deprecated; use HASH instead")
182 if warnings::enabled('deprecated');
a0cb3900
JH
183
184 my @args = ref $fullopt ? @$fullopt : ();
185 shift @args;
186 my $module = shift @args;
187 if ($context eq 'LIST' && $scalar_only{$module}) {
188 croak("You can't use $module for LIST_CACHE because it can only store scalars");
189 }
190 my $modulefile = $module . '.pm';
191 $modulefile =~ s{::}{/}g;
192 eval { require $modulefile };
193 if ($@) {
194 croak "Memoize: Couldn't load hash tie module `$module': $@; aborting";
195 }
a0cb3900
JH
196 my $rc = (tie %$hash => $module, @args);
197 unless ($rc) {
899dc88a 198 croak "Memoize: Couldn't tie hash to `$module': $!; aborting";
a0cb3900
JH
199 }
200 1;
201}
202
203sub flush_cache {
204 my $func = _make_cref($_[0], scalar caller);
205 my $info = $memotable{$revmemotable{$func}};
206 die "$func not memoized" unless defined $info;
207 for my $context (qw(S L)) {
208 my $cache = $info->{$context};
209 if (tied %$cache && ! (tied %$cache)->can('CLEAR')) {
210 my $funcname = defined($info->{NAME}) ?
211 "function $info->{NAME}" : "anonymous function $func";
212 my $context = {S => 'scalar', L => 'list'}->{$context};
213 croak "Tied cache hash for $context-context $funcname does not support flushing";
214 } else {
215 %$cache = ();
216 }
217 }
218}
219
220# This is the function that manages the memo tables.
221sub _memoizer {
222 my $orig = shift; # stringized version of ref to original func.
223 my $info = $memotable{$orig};
224 my $normalizer = $info->{N};
225
226 my $argstr;
227 my $context = (wantarray() ? LIST : SCALAR);
228
229 if (defined $normalizer) {
230 no strict;
231 if ($context == SCALAR) {
232 $argstr = &{$normalizer}(@_);
233 } elsif ($context == LIST) {
234 ($argstr) = &{$normalizer}(@_);
235 } else {
236 croak "Internal error \#41; context was neither LIST nor SCALAR\n";
237 }
238 } else { # Default normalizer
899dc88a
JH
239 local $^W = 0;
240 $argstr = join chr(28),@_;
a0cb3900
JH
241 }
242
243 if ($context == SCALAR) {
244 my $cache = $info->{S};
899dc88a 245 _crap_out($info->{NAME}, 'scalar') unless $cache;
a0cb3900
JH
246 if (exists $cache->{$argstr}) {
247 return $cache->{$argstr};
248 } else {
249 my $val = &{$info->{U}}(@_);
250 # Scalars are considered to be lists; store appropriately
251 if ($info->{O}{SCALAR_CACHE} eq 'MERGE') {
252 $cache->{$argstr} = [$val];
253 } else {
254 $cache->{$argstr} = $val;
255 }
256 $val;
257 }
258 } elsif ($context == LIST) {
259 my $cache = $info->{L};
899dc88a 260 _crap_out($info->{NAME}, 'list') unless $cache;
a0cb3900
JH
261 if (exists $cache->{$argstr}) {
262 my $val = $cache->{$argstr};
a0cb3900 263 # If LISTCONTEXT=>MERGE, then the function never returns lists,
899dc88a 264 # so we have a scalar value cached, so just return it straightaway:
a0cb3900 265 return ($val) if $info->{O}{LIST_CACHE} eq 'MERGE';
899dc88a
JH
266 # Maybe in a later version we can use a faster test.
267
268 # Otherwise, we cached an array containing the returned list:
a0cb3900
JH
269 return @$val;
270 } else {
271 my $q = $cache->{$argstr} = [&{$info->{U}}(@_)];
272 @$q;
273 }
274 } else {
275 croak "Internal error \#42; context was neither LIST nor SCALAR\n";
276 }
277}
278
279sub unmemoize {
280 my $f = shift;
281 my $uppack = caller;
282 my $cref = _make_cref($f, $uppack);
283
284 unless (exists $revmemotable{$cref}) {
285 croak "Could not unmemoize function `$f', because it was not memoized to begin with";
286 }
287
288 my $tabent = $memotable{$revmemotable{$cref}};
289 unless (defined $tabent) {
290 croak "Could not figure out how to unmemoize function `$f'";
291 }
292 my $name = $tabent->{NAME};
293 if (defined $name) {
294 no strict;
295 local($^W) = 0; # ``Subroutine $install_name redefined at ...''
296 *{$name} = $tabent->{U}; # Replace with original function
297 }
298 undef $memotable{$revmemotable{$cref}};
299 undef $revmemotable{$cref};
300
301 # This removes the last reference to the (possibly tied) memo tables
302 # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'};
303 # undef $tabent;
304
305# # Untie the memo tables if they were tied.
306# my $i;
307# for $i (0,1) {
308# if (tied %{$memotabs->[$i]}) {
309# warn "Untying hash #$i\n";
310# untie %{$memotabs->[$i]};
311# }
312# }
313
314 $tabent->{U};
315}
316
317sub _make_cref {
318 my $fn = shift;
319 my $uppack = shift;
320 my $cref;
321 my $name;
322
323 if (ref $fn eq 'CODE') {
324 $cref = $fn;
325 } elsif (! ref $fn) {
326 if ($fn =~ /::/) {
327 $name = $fn;
328 } else {
329 $name = $uppack . '::' . $fn;
330 }
331 no strict;
332 if (defined $name and !defined(&$name)) {
333 croak "Cannot operate on nonexistent function `$fn'";
334 }
335# $cref = \&$name;
336 $cref = *{$name}{CODE};
337 } else {
338 my $parent = (caller(1))[3]; # Function that called _make_cref
339 croak "Usage: argument 1 to `$parent' must be a function name or reference.\n";
340 }
341 $DEBUG and warn "${name}($fn) => $cref in _make_cref\n";
342 $cref;
343}
344
345sub _crap_out {
346 my ($funcname, $context) = @_;
347 if (defined $funcname) {
348 croak "Function `$funcname' called in forbidden $context context; faulting";
349 } else {
350 croak "Anonymous function called in forbidden $context context; faulting";
351 }
352}
353
3541;
355
356
357
358
359
360=head1 NAME
361
5189e6fe 362Memoize - Make functions faster by trading space for time
a0cb3900
JH
363
364=head1 SYNOPSIS
365
5189e6fe 366 # This is the documentation for Memoize 1.00
a0cb3900
JH
367 use Memoize;
368 memoize('slow_function');
369 slow_function(arguments); # Is faster than it was before
370
371
372This is normally all you need to know. However, many options are available:
373
374 memoize(function, options...);
375
376Options include:
377
378 NORMALIZER => function
379 INSTALL => new_name
380
381 SCALAR_CACHE => 'MEMORY'
382 SCALAR_CACHE => ['HASH', \%cache_hash ]
383 SCALAR_CACHE => 'FAULT'
384 SCALAR_CACHE => 'MERGE'
385
386 LIST_CACHE => 'MEMORY'
387 LIST_CACHE => ['HASH', \%cache_hash ]
388 LIST_CACHE => 'FAULT'
389 LIST_CACHE => 'MERGE'
390
391=head1 DESCRIPTION
392
393`Memoizing' a function makes it faster by trading space for time. It
394does this by caching the return values of the function in a table.
395If you call the function again with the same arguments, C<memoize>
3d4a255c 396jumps in and gives you the value out of the table, instead of letting
a0cb3900
JH
397the function compute the value all over again.
398
399Here is an extreme example. Consider the Fibonacci sequence, defined
400by the following function:
401
402 # Compute Fibonacci numbers
403 sub fib {
404 my $n = shift;
405 return $n if $n < 2;
406 fib($n-1) + fib($n-2);
407 }
408
409This function is very slow. Why? To compute fib(14), it first wants
410to compute fib(13) and fib(12), and add the results. But to compute
411fib(13), it first has to compute fib(12) and fib(11), and then it
412comes back and computes fib(12) all over again even though the answer
413is the same. And both of the times that it wants to compute fib(12),
414it has to compute fib(11) from scratch, and then it has to do it
415again each time it wants to compute fib(13). This function does so
416much recomputing of old results that it takes a really long time to
417run---fib(14) makes 1,200 extra recursive calls to itself, to compute
418and recompute things that it already computed.
419
420This function is a good candidate for memoization. If you memoize the
421`fib' function above, it will compute fib(14) exactly once, the first
422time it needs to, and then save the result in a table. Then if you
423ask for fib(14) again, it gives you the result out of the table.
424While computing fib(14), instead of computing fib(12) twice, it does
425it once; the second time it needs the value it gets it from the table.
426It doesn't compute fib(11) four times; it computes it once, getting it
427from the table the next three times. Instead of making 1,200
428recursive calls to `fib', it makes 15. This makes the function about
429150 times faster.
430
431You could do the memoization yourself, by rewriting the function, like
432this:
433
434 # Compute Fibonacci numbers, memoized version
435 { my @fib;
436 sub fib {
437 my $n = shift;
438 return $fib[$n] if defined $fib[$n];
439 return $fib[$n] = $n if $n < 2;
440 $fib[$n] = fib($n-1) + fib($n-2);
441 }
442 }
443
444Or you could use this module, like this:
445
446 use Memoize;
447 memoize('fib');
448
449 # Rest of the fib function just like the original version.
450
451This makes it easy to turn memoizing on and off.
452
453Here's an even simpler example: I wrote a simple ray tracer; the
454program would look in a certain direction, figure out what it was
455looking at, and then convert the `color' value (typically a string
456like `red') of that object to a red, green, and blue pixel value, like
457this:
458
459 for ($direction = 0; $direction < 300; $direction++) {
460 # Figure out which object is in direction $direction
461 $color = $object->{color};
462 ($r, $g, $b) = @{&ColorToRGB($color)};
463 ...
464 }
465
466Since there are relatively few objects in a picture, there are only a
467few colors, which get looked up over and over again. Memoizing
5189e6fe 468C<ColorToRGB> sped up the program by several percent.
a0cb3900
JH
469
470=head1 DETAILS
471
472This module exports exactly one function, C<memoize>. The rest of the
473functions in this package are None of Your Business.
474
475You should say
476
477 memoize(function)
478
479where C<function> is the name of the function you want to memoize, or
480a reference to it. C<memoize> returns a reference to the new,
481memoized version of the function, or C<undef> on a non-fatal error.
482At present, there are no non-fatal errors, but there might be some in
483the future.
484
485If C<function> was the name of a function, then C<memoize> hides the
486old version and installs the new memoized version under the old name,
487so that C<&function(...)> actually invokes the memoized version.
488
489=head1 OPTIONS
490
491There are some optional options you can pass to C<memoize> to change
492the way it behaves a little. To supply options, invoke C<memoize>
493like this:
494
495 memoize(function, NORMALIZER => function,
496 INSTALL => newname,
497 SCALAR_CACHE => option,
498 LIST_CACHE => option
499 );
500
501Each of these options is optional; you can include some, all, or none
502of them.
503
504=head2 INSTALL
505
506If you supply a function name with C<INSTALL>, memoize will install
507the new, memoized version of the function under the name you give.
508For example,
509
510 memoize('fib', INSTALL => 'fastfib')
511
512installs the memoized version of C<fib> as C<fastfib>; without the
513C<INSTALL> option it would have replaced the old C<fib> with the
514memoized version.
515
516To prevent C<memoize> from installing the memoized version anywhere, use
517C<INSTALL =E<gt> undef>.
518
519=head2 NORMALIZER
520
521Suppose your function looks like this:
522
523 # Typical call: f('aha!', A => 11, B => 12);
524 sub f {
525 my $a = shift;
526 my %hash = @_;
527 $hash{B} ||= 2; # B defaults to 2
528 $hash{C} ||= 7; # C defaults to 7
529
530 # Do something with $a, %hash
531 }
532
533Now, the following calls to your function are all completely equivalent:
534
535 f(OUCH);
536 f(OUCH, B => 2);
537 f(OUCH, C => 7);
538 f(OUCH, B => 2, C => 7);
539 f(OUCH, C => 7, B => 2);
540 (etc.)
541
542However, unless you tell C<Memoize> that these calls are equivalent,
543it will not know that, and it will compute the values for these
544invocations of your function separately, and store them separately.
545
546To prevent this, supply a C<NORMALIZER> function that turns the
547program arguments into a string in a way that equivalent arguments
548turn into the same string. A C<NORMALIZER> function for C<f> above
549might look like this:
550
551 sub normalize_f {
552 my $a = shift;
553 my %hash = @_;
554 $hash{B} ||= 2;
555 $hash{C} ||= 7;
556
3d4a255c 557 join(',', $a, map ($_ => $hash{$_}) sort keys %hash);
a0cb3900
JH
558 }
559
560Each of the argument lists above comes out of the C<normalize_f>
561function looking exactly the same, like this:
562
3d4a255c 563 OUCH,B,2,C,7
a0cb3900
JH
564
565You would tell C<Memoize> to use this normalizer this way:
566
567 memoize('f', NORMALIZER => 'normalize_f');
568
569C<memoize> knows that if the normalized version of the arguments is
570the same for two argument lists, then it can safely look up the value
571that it computed for one argument list and return it as the result of
572calling the function with the other argument list, even if the
573argument lists look different.
574
3d4a255c
JH
575The default normalizer just concatenates the arguments with character
57628 in between. (In ASCII, this is called FS or control-\.) This
577always works correctly for functions with only one string argument,
578and also when the arguments never contain character 28. However, it
579can confuse certain argument lists:
a0cb3900
JH
580
581 normalizer("a\034", "b")
582 normalizer("a", "\034b")
583 normalizer("a\034\034b")
584
3d4a255c 585for example.
a0cb3900 586
899dc88a
JH
587Since hash keys are strings, the default normalizer will not
588distinguish between C<undef> and the empty string. It also won't work
3d4a255c
JH
589when the function's arguments are references. For example, consider a
590function C<g> which gets two arguments: A number, and a reference to
899dc88a 591an array of numbers:
a0cb3900
JH
592
593 g(13, [1,2,3,4,5,6,7]);
594
595The default normalizer will turn this into something like
3d4a255c 596C<"13\034ARRAY(0x436c1f)">. That would be all right, except that a
a0cb3900
JH
597subsequent array of numbers might be stored at a different location
598even though it contains the same data. If this happens, C<Memoize>
599will think that the arguments are different, even though they are
600equivalent. In this case, a normalizer like this is appropriate:
601
602 sub normalize { join ' ', $_[0], @{$_[1]} }
603
604For the example above, this produces the key "13 1 2 3 4 5 6 7".
605
606Another use for normalizers is when the function depends on data other
607than those in its arguments. Suppose you have a function which
608returns a value which depends on the current hour of the day:
609
610 sub on_duty {
611 my ($problem_type) = @_;
612 my $hour = (localtime)[2];
613 open my $fh, "$DIR/$problem_type" or die...;
614 my $line;
615 while ($hour-- > 0)
616 $line = <$fh>;
617 }
618 return $line;
619 }
620
3d4a255c 621At 10:23, this function generates the 10th line of a data file; at
a0cb3900
JH
6223:45 PM it generates the 15th line instead. By default, C<Memoize>
623will only see the $problem_type argument. To fix this, include the
624current hour in the normalizer:
625
626 sub normalize { join ' ', (localtime)[2], @_ }
627
628The calling context of the function (scalar or list context) is
629propagated to the normalizer. This means that if the memoized
630function will treat its arguments differently in list context than it
631would in scalar context, you can have the normalizer function select
632its behavior based on the results of C<wantarray>. Even if called in
633a list context, a normalizer should still return a single string.
634
635=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
636
637Normally, C<Memoize> caches your function's return values into an
638ordinary Perl hash variable. However, you might like to have the
639values cached on the disk, so that they persist from one run of your
640program to the next, or you might like to associate some other
3d4a255c 641interesting semantics with the cached values.
a0cb3900
JH
642
643There's a slight complication under the hood of C<Memoize>: There are
644actually I<two> caches, one for scalar values and one for list values.
645When your function is called in scalar context, its return value is
646cached in one hash, and when your function is called in list context,
647its value is cached in the other hash. You can control the caching
648behavior of both contexts independently with these options.
649
650The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
651the following four strings:
652
653 MEMORY
654 FAULT
655 MERGE
3d4a255c 656 HASH
a0cb3900
JH
657
658or else it must be a reference to a list whose first element is one of
659these four strings, such as C<[HASH, arguments...]>.
660
661=over 4
662
663=item C<MEMORY>
664
665C<MEMORY> means that return values from the function will be cached in
666an ordinary Perl hash variable. The hash variable will not persist
667after the program exits. This is the default.
668
669=item C<HASH>
670
671C<HASH> allows you to specify that a particular hash that you supply
672will be used as the cache. You can tie this hash beforehand to give
673it any behavior you want.
674
675A tied hash can have any semantics at all. It is typically tied to an
676on-disk database, so that cached values are stored in the database and
677retrieved from it again when needed, and the disk file typically
678persists after your program has exited. See C<perltie> for more
679complete details about C<tie>.
680
681A typical example is:
682
3d4a255c 683 use DB_File;
a0cb3900
JH
684 tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666;
685 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
686
687This has the effect of storing the cache in a C<DB_File> database
688whose name is in C<$filename>. The cache will persist after the
689program has exited. Next time the program runs, it will find the
690cache already populated from the previous run of the program. Or you
691can forcibly populate the cache by constructing a batch program that
692runs in the background and populates the cache file. Then when you
693come to run your real program the memoized function will be fast
694because all its results have been precomputed.
695
696=item C<TIE>
697
5189e6fe
JH
698This option is no longer supported. It is still documented only to
699aid in the debugging of old programs that use it. Old programs should
700be converted to use the C<HASH> option instead.
a0cb3900 701
3d4a255c 702 memoize ... [TIE, PACKAGE, ARGS...]
a0cb3900
JH
703
704is merely a shortcut for
705
3d4a255c 706 require PACKAGE;
5189e6fe
JH
707 { my %cache;
708 tie %cache, PACKAGE, ARGS...;
709 }
a0cb3900
JH
710 memoize ... [HASH => \%cache];
711
a0cb3900
JH
712=item C<FAULT>
713
714C<FAULT> means that you never expect to call the function in scalar
715(or list) context, and that if C<Memoize> detects such a call, it
716should abort the program. The error message is one of
717
718 `foo' function called in forbidden list context at line ...
719 `foo' function called in forbidden scalar context at line ...
720
721=item C<MERGE>
722
723C<MERGE> normally means the function does not distinguish between list
724and sclar context, and that return values in both contexts should be
725stored together. C<LIST_CACHE =E<gt> MERGE> means that list context
726return values should be stored in the same hash that is used for
727scalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the
728same, mutatis mutandis. It is an error to specify C<MERGE> for both,
729but it probably does something useful.
730
731Consider this function:
732
733 sub pi { 3; }
734
735Normally, the following code will result in two calls to C<pi>:
736
737 $x = pi();
738 ($y) = pi();
739 $z = pi();
740
741The first call caches the value C<3> in the scalar cache; the second
742caches the list C<(3)> in the list cache. The third call doesn't call
743the real C<pi> function; it gets the value from the scalar cache.
744
745Obviously, the second call to C<pi> is a waste of time, and storing
3d4a255c
JH
746its return value is a waste of space. Specifying C<LIST_CACHE =E<gt>
747MERGE> will make C<memoize> use the same cache for scalar and list
748context return values, so that the second call uses the scalar cache
749that was populated by the first call. C<pi> ends up being called only
750once, and both subsequent calls return C<3> from the cache, regardless
751of the calling context.
a0cb3900
JH
752
753Another use for C<MERGE> is when you want both kinds of return values
754stored in the same disk file; this saves you from having to deal with
755two disk files instead of one. You can use a normalizer function to
756keep the two sets of return values separate. For example:
757
758 tie my %cache => 'MLDBM', 'DB_File', $filename, ...;
759
760 memoize 'myfunc',
761 NORMALIZER => 'n',
762 SCALAR_CACHE => [HASH => \%cache],
763 LIST_CACHE => MERGE,
764 ;
765
766 sub n {
767 my $context = wantarray() ? 'L' : 'S';
768 # ... now compute the hash key from the arguments ...
769 $hashkey = "$context:$hashkey";
770 }
771
772This normalizer function will store scalar context return values in
773the disk file under keys that begin with C<S:>, and list context
774return values under keys that begin with C<L:>.
775
776=back
777
778=head1 OTHER FACILITIES
779
780=head2 C<unmemoize>
781
782There's an C<unmemoize> function that you can import if you want to.
783Why would you want to? Here's an example: Suppose you have your cache
784tied to a DBM file, and you want to make sure that the cache is
785written out to disk if someone interrupts the program. If the program
786exits normally, this will happen anyway, but if someone types
787control-C or something then the program will terminate immediately
788without synchronizing the database. So what you can do instead is
789
790 $SIG{INT} = sub { unmemoize 'function' };
791
a0cb3900
JH
792C<unmemoize> accepts a reference to, or the name of a previously
793memoized function, and undoes whatever it did to provide the memoized
794version in the first place, including making the name refer to the
795unmemoized version if appropriate. It returns a reference to the
796unmemoized version of the function.
797
798If you ask it to unmemoize a function that was never memoized, it
799croaks.
800
801=head2 C<flush_cache>
802
803C<flush_cache(function)> will flush out the caches, discarding I<all>
3d4a255c 804the cached data. The argument may be a function name or a reference
a0cb3900
JH
805to a function. For finer control over when data is discarded or
806expired, see the documentation for C<Memoize::Expire>, included in
807this package.
808
809Note that if the cache is a tied hash, C<flush_cache> will attempt to
810invoke the C<CLEAR> method on the hash. If there is no C<CLEAR>
811method, this will cause a run-time error.
812
813An alternative approach to cache flushing is to use the C<HASH> option
814(see above) to request that C<Memoize> use a particular hash variable
815as its cache. Then you can examine or modify the hash at any time in
3d4a255c 816any way you desire. You may flush the cache by using C<%hash = ()>.
a0cb3900
JH
817
818=head1 CAVEATS
819
820Memoization is not a cure-all:
821
822=over 4
823
824=item *
825
826Do not memoize a function whose behavior depends on program
827state other than its own arguments, such as global variables, the time
828of day, or file input. These functions will not produce correct
829results when memoized. For a particularly easy example:
830
831 sub f {
832 time;
833 }
834
835This function takes no arguments, and as far as C<Memoize> is
836concerned, it always returns the same result. C<Memoize> is wrong, of
837course, and the memoized version of this function will call C<time> once
838to get the current time, and it will return that same time
839every time you call it after that.
840
841=item *
842
843Do not memoize a function with side effects.
844
845 sub f {
846 my ($a, $b) = @_;
847 my $s = $a + $b;
848 print "$a + $b = $s.\n";
849 }
850
851This function accepts two arguments, adds them, and prints their sum.
852Its return value is the numuber of characters it printed, but you
853probably didn't care about that. But C<Memoize> doesn't understand
854that. If you memoize this function, you will get the result you
855expect the first time you ask it to print the sum of 2 and 3, but
856subsequent calls will return 1 (the return value of
857C<print>) without actually printing anything.
858
859=item *
860
861Do not memoize a function that returns a data structure that is
862modified by its caller.
863
864Consider these functions: C<getusers> returns a list of users somehow,
865and then C<main> throws away the first user on the list and prints the
866rest:
867
868 sub main {
869 my $userlist = getusers();
870 shift @$userlist;
871 foreach $u (@$userlist) {
872 print "User $u\n";
873 }
874 }
875
876 sub getusers {
877 my @users;
878 # Do something to get a list of users;
879 \@users; # Return reference to list.
880 }
881
882If you memoize C<getusers> here, it will work right exactly once. The
883reference to the users list will be stored in the memo table. C<main>
884will discard the first element from the referenced list. The next
885time you invoke C<main>, C<Memoize> will not call C<getusers>; it will
886just return the same reference to the same list it got last time. But
887this time the list has already had its head removed; C<main> will
888erroneously remove another element from it. The list will get shorter
889and shorter every time you call C<main>.
890
891Similarly, this:
892
893 $u1 = getusers();
894 $u2 = getusers();
895 pop @$u1;
896
897will modify $u2 as well as $u1, because both variables are references
898to the same array. Had C<getusers> not been memoized, $u1 and $u2
899would have referred to different arrays.
900
901=item *
902
903Do not memoize a very simple function.
904
905Recently someone mentioned to me that the Memoize module made his
906program run slower instead of faster. It turned out that he was
907memoizing the following function:
908
909 sub square {
910 $_[0] * $_[0];
911 }
912
913I pointed out that C<Memoize> uses a hash, and that looking up a
914number in the hash is necessarily going to take a lot longer than a
915single multiplication. There really is no way to speed up the
916C<square> function.
917
918Memoization is not magical.
919
920=back
921
922=head1 PERSISTENT CACHE SUPPORT
923
924You can tie the cache tables to any sort of tied hash that you want
925to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
926C<EXISTS>. For example,
927
928 tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666;
929 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
930
931works just fine. For some storage methods, you need a little glue.
932
933C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
934package is a glue module called C<Memoize::SDBM_File> which does
935provide one. Use this instead of plain C<SDBM_File> to store your
936cache table on disk in an C<SDBM_File> database:
937
938 tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666;
939 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
940
941C<NDBM_File> has the same problem and the same solution. (Use
899dc88a 942C<Memoize::NDBM_File instead of plain NDBM_File.>)
a0cb3900
JH
943
944C<Storable> isn't a tied hash class at all. You can use it to store a
945hash to disk and retrieve it again, but you can't modify the hash while
946it's on the disk. So if you want to store your cache table in a
947C<Storable> database, use C<Memoize::Storable>, which puts a hashlike
948front-end onto C<Storable>. The hash table is actually kept in
949memory, and is loaded from your C<Storable> file at the time you
950memoize the function, and stored back at the time you unmemoize the
951function (or when your program exits):
952
953 tie my %cache => 'Memoize::Storable', $filename;
954 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
955
956 tie my %cache => 'Memoize::Storable', $filename, 'nstore';
957 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
958
959Include the `nstore' option to have the C<Storable> database written
960in `network order'. (See L<Storable> for more details about this.)
961
3d4a255c
JH
962The C<flush_cache()> function will raise a run-time error unless the
963tied package provides a C<CLEAR> method.
964
a0cb3900
JH
965=head1 EXPIRATION SUPPORT
966
967See Memoize::Expire, which is a plug-in module that adds expiration
968functionality to Memoize. If you don't like the kinds of policies
969that Memoize::Expire implements, it is easy to write your own plug-in
970module to implement whatever policy you desire. Memoize comes with
971several examples. An expiration manager that implements a LRU policy
972is available on CPAN as Memoize::ExpireLRU.
973
974=head1 BUGS
975
976The test suite is much better, but always needs improvement.
977
3d4a255c
JH
978There is some problem with the way C<goto &f> works under threaded
979Perl, perhaps because of the lexical scoping of C<@_>. This is a bug
980in Perl, and until it is resolved, memoized functions will see a
981slightly different C<caller()> and will perform a little more slowly
982on threaded perls than unthreaded perls.
a0cb3900 983
5189e6fe
JH
984Some versions of C<DB_File> won't let you store data under a key of
985length 0. That means that if you have a function C<f> which you
986memoized and the cache is in a C<DB_File> database, then the value of
987C<f()> (C<f> called with no arguments) will not be memoized. If this
988is a big problem, you can supply a normalizer function that prepends
989C<"x"> to every key.
a0cb3900
JH
990
991=head1 MAILING LIST
992
993To join a very low-traffic mailing list for announcements about
994C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>.
995
996=head1 AUTHOR
997
998Mark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co.
999
1000See the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/
1001for news and upgrades. Near this page, at
1002http://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about
1003memoization and about the internals of Memoize that appeared in The
1004Perl Journal, issue #13. (This article is also included in the
1005Memoize distribution as `article.html'.)
1006
3d4a255c
JH
1007My upcoming book will discuss memoization (and many other fascinating
1008topics) in tremendous detail. It will be published by Morgan Kaufmann
1009in 2002, possibly under the title I<Perl Advanced Techniques
1010Handbook>. It will also be available on-line for free. For more
1011information, visit http://perl.plover.com/book/ .
1012
a0cb3900
JH
1013To join a mailing list for announcements about C<Memoize>, send an
1014empty message to C<mjd-perl-memoize-request@plover.com>. This mailing
1015list is for announcements only and has extremely low traffic---about
3d4a255c 1016two messages per year.
a0cb3900 1017
899dc88a
JH
1018=head1 COPYRIGHT AND LICENSE
1019
1020Copyright 1998, 1999, 2000, 2001 by Mark Jason Dominus
1021
1022This library is free software; you may redistribute it and/or modify
3d4a255c 1023it under the same terms as Perl itself.
899dc88a 1024
a0cb3900
JH
1025=head1 THANK YOU
1026
1027Many thanks to Jonathan Roy for bug reports and suggestions, to
1028Michael Schwern for other bug reports and patches, to Mike Cariaso for
1029helping me to figure out the Right Thing to Do About Expiration, to
3d4a255c
JH
1030Joshua Gerth, Joshua Chamas, Jonathan Roy (again), Mark D. Anderson,
1031and Andrew Johnson for more suggestions about expiration, to Brent
1032Powers for the Memoize::ExpireLRU module, to Ariel Scolnicov for
1033delightful messages about the Fibonacci function, to Dion Almaer for
a0cb3900
JH
1034thought-provoking suggestions about the default normalizer, to Walt
1035Mankowski and Kurt Starsinic for much help investigating problems
1036under threaded Perl, to Alex Dudkevich for reporting the bug in
1037prototyped functions and for checking my patch, to Tony Bass for many
3d4a255c
JH
1038helpful suggestions, to Jonathan Roy (again) for finding a use for
1039C<unmemoize()>, to Philippe Verdret for enlightening discussion of
1040C<Hook::PrePostCall>, to Nat Torkington for advice I ignored, to Chris
a0cb3900
JH
1041Nandor for portability advice, to Randal Schwartz for suggesting the
1042'C<flush_cache> function, and to Jenda Krynicky for being a light in
1043the world.
1044
899dc88a
JH
1045Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including
1046this module in the core and for his patient and helpful guidance
1047during the integration process.
3d4a255c 1048
a0cb3900 1049=cut