This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Réf. : Re: PATCH proposal for ext/Safe/safe2.t
[perl5.git] / lib / Memoize.pm
CommitLineData
a0cb3900
JH
1# -*- mode: perl; perl-indent-level: 2; -*-
2# Memoize.pm
3#
4# Transparent memoization of idempotent functions
5#
899dc88a 6# Copyright 1998, 1999, 2000, 2001 M-J. Dominus.
a0cb3900
JH
7# You may copy and distribute this program under the
8# same terms as Perl itself. If in doubt,
9# write to mjd-perl-memoize+@plover.com for a license.
10#
3d4a255c 11# Version 0.66 $Revision: 1.18 $ $Date: 2001/06/24 17:16:47 $
a0cb3900
JH
12
13package Memoize;
3d4a255c 14$VERSION = '0.66';
a0cb3900
JH
15
16# Compile-time constants
17sub SCALAR () { 0 }
18sub LIST () { 1 }
19
20
21#
22# Usage memoize(functionname/ref,
23# { NORMALIZER => coderef, INSTALL => name,
24# LIST_CACHE => descriptor, SCALAR_CACHE => descriptor }
25#
26
27use Carp;
28use Exporter;
29use vars qw($DEBUG);
899dc88a 30use Config; # Dammit.
a0cb3900
JH
31@ISA = qw(Exporter);
32@EXPORT = qw(memoize);
33@EXPORT_OK = qw(unmemoize flush_cache);
34use strict;
35
36my %memotable;
37my %revmemotable;
38my @CONTEXT_TAGS = qw(MERGE TIE MEMORY FAULT HASH);
39my %IS_CACHE_TAG = map {($_ => 1)} @CONTEXT_TAGS;
40
41# Raise an error if the user tries to specify one of thesepackage as a
42# tie for LIST_CACHE
43
44my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File);
45
46sub memoize {
47 my $fn = shift;
48 my %options = @_;
49 my $options = \%options;
50
51 unless (defined($fn) &&
52 (ref $fn eq 'CODE' || ref $fn eq '')) {
53 croak "Usage: memoize 'functionname'|coderef {OPTIONS}";
54 }
55
56 my $uppack = caller; # TCL me Elmo!
57 my $cref; # Code reference to original function
58 my $name = (ref $fn ? undef : $fn);
59
60 # Convert function names to code references
61 $cref = &_make_cref($fn, $uppack);
62
63 # Locate function prototype, if any
64 my $proto = prototype $cref;
65 if (defined $proto) { $proto = "($proto)" }
66 else { $proto = "" }
67
899dc88a
JH
68 # I would like to get rid of the eval, but there seems not to be any
69 # other way to set the prototype properly. The switch here for
70 # 'usethreads' works around a bug in threadperl having to do with
71 # magic goto. It would be better to fix the bug and use the magic
72 # goto version everywhere.
73 my $wrapper =
74 $Config{usethreads}
75 ? eval "sub $proto { &_memoizer(\$cref, \@_); }"
76 : eval "sub $proto { unshift \@_, \$cref; goto &_memoizer; }";
a0cb3900
JH
77
78 my $normalizer = $options{NORMALIZER};
79 if (defined $normalizer && ! ref $normalizer) {
80 $normalizer = _make_cref($normalizer, $uppack);
81 }
82
83 my $install_name;
84 if (defined $options->{INSTALL}) {
85 # INSTALL => name
86 $install_name = $options->{INSTALL};
87 } elsif (! exists $options->{INSTALL}) {
88 # No INSTALL option provided; use original name if possible
89 $install_name = $name;
90 } else {
91 # INSTALL => undef means don't install
92 }
93
94 if (defined $install_name) {
95 $install_name = $uppack . '::' . $install_name
96 unless $install_name =~ /::/;
97 no strict;
98 local($^W) = 0; # ``Subroutine $install_name redefined at ...''
99 *{$install_name} = $wrapper; # Install memoized version
100 }
101
102 $revmemotable{$wrapper} = "" . $cref; # Turn code ref into hash key
103
104 # These will be the caches
105 my %caches;
106 for my $context (qw(SCALAR LIST)) {
107 # suppress subsequent 'uninitialized value' warnings
108 $options{"${context}_CACHE"} ||= '';
109
110 my $cache_opt = $options{"${context}_CACHE"};
111 my @cache_opt_args;
112 if (ref $cache_opt) {
113 @cache_opt_args = @$cache_opt;
114 $cache_opt = shift @cache_opt_args;
115 }
116 if ($cache_opt eq 'FAULT') { # no cache
117 $caches{$context} = undef;
118 } elsif ($cache_opt eq 'HASH') { # user-supplied hash
899dc88a
JH
119 my $cache = $cache_opt_args[0];
120 my $package = ref(tied %$cache);
121 if ($context eq 'LIST' && $scalar_only{$package}) {
122 croak("You can't use $package for LIST_CACHE because it can only store scalars");
123 }
124 $caches{$context} = $cache;
a0cb3900
JH
125 } elsif ($cache_opt eq '' || $IS_CACHE_TAG{$cache_opt}) {
126 # default is that we make up an in-memory hash
127 $caches{$context} = {};
128 # (this might get tied later, or MERGEd away)
129 } else {
130 croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (@CONTEXT_TAGS); aborting";
131 }
132 }
133
134 # Perhaps I should check here that you didn't supply *both* merge
135 # options. But if you did, it does do something reasonable: They
136 # both get merged to the same in-memory hash.
137 if ($options{SCALAR_CACHE} eq 'MERGE') {
138 $caches{SCALAR} = $caches{LIST};
139 } elsif ($options{LIST_CACHE} eq 'MERGE') {
140 $caches{LIST} = $caches{SCALAR};
141 }
142
143 # Now deal with the TIE options
144 {
145 my $context;
146 foreach $context (qw(SCALAR LIST)) {
147 # If the relevant option wasn't `TIE', this call does nothing.
148 _my_tie($context, $caches{$context}, $options); # Croaks on failure
149 }
150 }
151
152 # We should put some more stuff in here eventually.
153 # We've been saying that for serveral versions now.
154 # And you know what? More stuff keeps going in!
155 $memotable{$cref} =
156 {
157 O => $options, # Short keys here for things we need to access frequently
158 N => $normalizer,
159 U => $cref,
160 MEMOIZED => $wrapper,
161 PACKAGE => $uppack,
162 NAME => $install_name,
163 S => $caches{SCALAR},
164 L => $caches{LIST},
165 };
166
167 $wrapper # Return just memoized version
168}
169
170# This function tries to load a tied hash class and tie the hash to it.
171sub _my_tie {
172 my ($context, $hash, $options) = @_;
173 my $fullopt = $options->{"${context}_CACHE"};
174
175 # We already checked to make sure that this works.
176 my $shortopt = (ref $fullopt) ? $fullopt->[0] : $fullopt;
177
178 return unless defined $shortopt && $shortopt eq 'TIE';
899dc88a
JH
179 carp("TIE option to memoize() is deprecated; use HASH instead") if $^W;
180
a0cb3900
JH
181
182 my @args = ref $fullopt ? @$fullopt : ();
183 shift @args;
184 my $module = shift @args;
185 if ($context eq 'LIST' && $scalar_only{$module}) {
186 croak("You can't use $module for LIST_CACHE because it can only store scalars");
187 }
188 my $modulefile = $module . '.pm';
189 $modulefile =~ s{::}{/}g;
190 eval { require $modulefile };
191 if ($@) {
192 croak "Memoize: Couldn't load hash tie module `$module': $@; aborting";
193 }
a0cb3900
JH
194 my $rc = (tie %$hash => $module, @args);
195 unless ($rc) {
899dc88a 196 croak "Memoize: Couldn't tie hash to `$module': $!; aborting";
a0cb3900
JH
197 }
198 1;
199}
200
201sub flush_cache {
202 my $func = _make_cref($_[0], scalar caller);
203 my $info = $memotable{$revmemotable{$func}};
204 die "$func not memoized" unless defined $info;
205 for my $context (qw(S L)) {
206 my $cache = $info->{$context};
207 if (tied %$cache && ! (tied %$cache)->can('CLEAR')) {
208 my $funcname = defined($info->{NAME}) ?
209 "function $info->{NAME}" : "anonymous function $func";
210 my $context = {S => 'scalar', L => 'list'}->{$context};
211 croak "Tied cache hash for $context-context $funcname does not support flushing";
212 } else {
213 %$cache = ();
214 }
215 }
216}
217
218# This is the function that manages the memo tables.
219sub _memoizer {
220 my $orig = shift; # stringized version of ref to original func.
221 my $info = $memotable{$orig};
222 my $normalizer = $info->{N};
223
224 my $argstr;
225 my $context = (wantarray() ? LIST : SCALAR);
226
227 if (defined $normalizer) {
228 no strict;
229 if ($context == SCALAR) {
230 $argstr = &{$normalizer}(@_);
231 } elsif ($context == LIST) {
232 ($argstr) = &{$normalizer}(@_);
233 } else {
234 croak "Internal error \#41; context was neither LIST nor SCALAR\n";
235 }
236 } else { # Default normalizer
899dc88a
JH
237 local $^W = 0;
238 $argstr = join chr(28),@_;
a0cb3900
JH
239 }
240
241 if ($context == SCALAR) {
242 my $cache = $info->{S};
899dc88a 243 _crap_out($info->{NAME}, 'scalar') unless $cache;
a0cb3900
JH
244 if (exists $cache->{$argstr}) {
245 return $cache->{$argstr};
246 } else {
247 my $val = &{$info->{U}}(@_);
248 # Scalars are considered to be lists; store appropriately
249 if ($info->{O}{SCALAR_CACHE} eq 'MERGE') {
250 $cache->{$argstr} = [$val];
251 } else {
252 $cache->{$argstr} = $val;
253 }
254 $val;
255 }
256 } elsif ($context == LIST) {
257 my $cache = $info->{L};
899dc88a 258 _crap_out($info->{NAME}, 'list') unless $cache;
a0cb3900
JH
259 if (exists $cache->{$argstr}) {
260 my $val = $cache->{$argstr};
a0cb3900 261 # If LISTCONTEXT=>MERGE, then the function never returns lists,
899dc88a 262 # so we have a scalar value cached, so just return it straightaway:
a0cb3900 263 return ($val) if $info->{O}{LIST_CACHE} eq 'MERGE';
899dc88a
JH
264 # Maybe in a later version we can use a faster test.
265
266 # Otherwise, we cached an array containing the returned list:
a0cb3900
JH
267 return @$val;
268 } else {
269 my $q = $cache->{$argstr} = [&{$info->{U}}(@_)];
270 @$q;
271 }
272 } else {
273 croak "Internal error \#42; context was neither LIST nor SCALAR\n";
274 }
275}
276
277sub unmemoize {
278 my $f = shift;
279 my $uppack = caller;
280 my $cref = _make_cref($f, $uppack);
281
282 unless (exists $revmemotable{$cref}) {
283 croak "Could not unmemoize function `$f', because it was not memoized to begin with";
284 }
285
286 my $tabent = $memotable{$revmemotable{$cref}};
287 unless (defined $tabent) {
288 croak "Could not figure out how to unmemoize function `$f'";
289 }
290 my $name = $tabent->{NAME};
291 if (defined $name) {
292 no strict;
293 local($^W) = 0; # ``Subroutine $install_name redefined at ...''
294 *{$name} = $tabent->{U}; # Replace with original function
295 }
296 undef $memotable{$revmemotable{$cref}};
297 undef $revmemotable{$cref};
298
299 # This removes the last reference to the (possibly tied) memo tables
300 # my ($old_function, $memotabs) = @{$tabent}{'U','S','L'};
301 # undef $tabent;
302
303# # Untie the memo tables if they were tied.
304# my $i;
305# for $i (0,1) {
306# if (tied %{$memotabs->[$i]}) {
307# warn "Untying hash #$i\n";
308# untie %{$memotabs->[$i]};
309# }
310# }
311
312 $tabent->{U};
313}
314
315sub _make_cref {
316 my $fn = shift;
317 my $uppack = shift;
318 my $cref;
319 my $name;
320
321 if (ref $fn eq 'CODE') {
322 $cref = $fn;
323 } elsif (! ref $fn) {
324 if ($fn =~ /::/) {
325 $name = $fn;
326 } else {
327 $name = $uppack . '::' . $fn;
328 }
329 no strict;
330 if (defined $name and !defined(&$name)) {
331 croak "Cannot operate on nonexistent function `$fn'";
332 }
333# $cref = \&$name;
334 $cref = *{$name}{CODE};
335 } else {
336 my $parent = (caller(1))[3]; # Function that called _make_cref
337 croak "Usage: argument 1 to `$parent' must be a function name or reference.\n";
338 }
339 $DEBUG and warn "${name}($fn) => $cref in _make_cref\n";
340 $cref;
341}
342
343sub _crap_out {
344 my ($funcname, $context) = @_;
345 if (defined $funcname) {
346 croak "Function `$funcname' called in forbidden $context context; faulting";
347 } else {
348 croak "Anonymous function called in forbidden $context context; faulting";
349 }
350}
351
3521;
353
354
355
356
357
358=head1 NAME
359
360Memoize - Make your functions faster by trading space for time
361
362=head1 SYNOPSIS
363
364 use Memoize;
365 memoize('slow_function');
366 slow_function(arguments); # Is faster than it was before
367
368
369This is normally all you need to know. However, many options are available:
370
371 memoize(function, options...);
372
373Options include:
374
375 NORMALIZER => function
376 INSTALL => new_name
377
378 SCALAR_CACHE => 'MEMORY'
379 SCALAR_CACHE => ['HASH', \%cache_hash ]
380 SCALAR_CACHE => 'FAULT'
381 SCALAR_CACHE => 'MERGE'
382
383 LIST_CACHE => 'MEMORY'
384 LIST_CACHE => ['HASH', \%cache_hash ]
385 LIST_CACHE => 'FAULT'
386 LIST_CACHE => 'MERGE'
387
388=head1 DESCRIPTION
389
390`Memoizing' a function makes it faster by trading space for time. It
391does this by caching the return values of the function in a table.
392If you call the function again with the same arguments, C<memoize>
3d4a255c 393jumps in and gives you the value out of the table, instead of letting
a0cb3900
JH
394the function compute the value all over again.
395
396Here is an extreme example. Consider the Fibonacci sequence, defined
397by the following function:
398
399 # Compute Fibonacci numbers
400 sub fib {
401 my $n = shift;
402 return $n if $n < 2;
403 fib($n-1) + fib($n-2);
404 }
405
406This function is very slow. Why? To compute fib(14), it first wants
407to compute fib(13) and fib(12), and add the results. But to compute
408fib(13), it first has to compute fib(12) and fib(11), and then it
409comes back and computes fib(12) all over again even though the answer
410is the same. And both of the times that it wants to compute fib(12),
411it has to compute fib(11) from scratch, and then it has to do it
412again each time it wants to compute fib(13). This function does so
413much recomputing of old results that it takes a really long time to
414run---fib(14) makes 1,200 extra recursive calls to itself, to compute
415and recompute things that it already computed.
416
417This function is a good candidate for memoization. If you memoize the
418`fib' function above, it will compute fib(14) exactly once, the first
419time it needs to, and then save the result in a table. Then if you
420ask for fib(14) again, it gives you the result out of the table.
421While computing fib(14), instead of computing fib(12) twice, it does
422it once; the second time it needs the value it gets it from the table.
423It doesn't compute fib(11) four times; it computes it once, getting it
424from the table the next three times. Instead of making 1,200
425recursive calls to `fib', it makes 15. This makes the function about
426150 times faster.
427
428You could do the memoization yourself, by rewriting the function, like
429this:
430
431 # Compute Fibonacci numbers, memoized version
432 { my @fib;
433 sub fib {
434 my $n = shift;
435 return $fib[$n] if defined $fib[$n];
436 return $fib[$n] = $n if $n < 2;
437 $fib[$n] = fib($n-1) + fib($n-2);
438 }
439 }
440
441Or you could use this module, like this:
442
443 use Memoize;
444 memoize('fib');
445
446 # Rest of the fib function just like the original version.
447
448This makes it easy to turn memoizing on and off.
449
450Here's an even simpler example: I wrote a simple ray tracer; the
451program would look in a certain direction, figure out what it was
452looking at, and then convert the `color' value (typically a string
453like `red') of that object to a red, green, and blue pixel value, like
454this:
455
456 for ($direction = 0; $direction < 300; $direction++) {
457 # Figure out which object is in direction $direction
458 $color = $object->{color};
459 ($r, $g, $b) = @{&ColorToRGB($color)};
460 ...
461 }
462
463Since there are relatively few objects in a picture, there are only a
464few colors, which get looked up over and over again. Memoizing
465C<ColorToRGB> speeded up the program by several percent.
466
467=head1 DETAILS
468
469This module exports exactly one function, C<memoize>. The rest of the
470functions in this package are None of Your Business.
471
472You should say
473
474 memoize(function)
475
476where C<function> is the name of the function you want to memoize, or
477a reference to it. C<memoize> returns a reference to the new,
478memoized version of the function, or C<undef> on a non-fatal error.
479At present, there are no non-fatal errors, but there might be some in
480the future.
481
482If C<function> was the name of a function, then C<memoize> hides the
483old version and installs the new memoized version under the old name,
484so that C<&function(...)> actually invokes the memoized version.
485
486=head1 OPTIONS
487
488There are some optional options you can pass to C<memoize> to change
489the way it behaves a little. To supply options, invoke C<memoize>
490like this:
491
492 memoize(function, NORMALIZER => function,
493 INSTALL => newname,
494 SCALAR_CACHE => option,
495 LIST_CACHE => option
496 );
497
498Each of these options is optional; you can include some, all, or none
499of them.
500
501=head2 INSTALL
502
503If you supply a function name with C<INSTALL>, memoize will install
504the new, memoized version of the function under the name you give.
505For example,
506
507 memoize('fib', INSTALL => 'fastfib')
508
509installs the memoized version of C<fib> as C<fastfib>; without the
510C<INSTALL> option it would have replaced the old C<fib> with the
511memoized version.
512
513To prevent C<memoize> from installing the memoized version anywhere, use
514C<INSTALL =E<gt> undef>.
515
516=head2 NORMALIZER
517
518Suppose your function looks like this:
519
520 # Typical call: f('aha!', A => 11, B => 12);
521 sub f {
522 my $a = shift;
523 my %hash = @_;
524 $hash{B} ||= 2; # B defaults to 2
525 $hash{C} ||= 7; # C defaults to 7
526
527 # Do something with $a, %hash
528 }
529
530Now, the following calls to your function are all completely equivalent:
531
532 f(OUCH);
533 f(OUCH, B => 2);
534 f(OUCH, C => 7);
535 f(OUCH, B => 2, C => 7);
536 f(OUCH, C => 7, B => 2);
537 (etc.)
538
539However, unless you tell C<Memoize> that these calls are equivalent,
540it will not know that, and it will compute the values for these
541invocations of your function separately, and store them separately.
542
543To prevent this, supply a C<NORMALIZER> function that turns the
544program arguments into a string in a way that equivalent arguments
545turn into the same string. A C<NORMALIZER> function for C<f> above
546might look like this:
547
548 sub normalize_f {
549 my $a = shift;
550 my %hash = @_;
551 $hash{B} ||= 2;
552 $hash{C} ||= 7;
553
3d4a255c 554 join(',', $a, map ($_ => $hash{$_}) sort keys %hash);
a0cb3900
JH
555 }
556
557Each of the argument lists above comes out of the C<normalize_f>
558function looking exactly the same, like this:
559
3d4a255c 560 OUCH,B,2,C,7
a0cb3900
JH
561
562You would tell C<Memoize> to use this normalizer this way:
563
564 memoize('f', NORMALIZER => 'normalize_f');
565
566C<memoize> knows that if the normalized version of the arguments is
567the same for two argument lists, then it can safely look up the value
568that it computed for one argument list and return it as the result of
569calling the function with the other argument list, even if the
570argument lists look different.
571
3d4a255c
JH
572The default normalizer just concatenates the arguments with character
57328 in between. (In ASCII, this is called FS or control-\.) This
574always works correctly for functions with only one string argument,
575and also when the arguments never contain character 28. However, it
576can confuse certain argument lists:
a0cb3900
JH
577
578 normalizer("a\034", "b")
579 normalizer("a", "\034b")
580 normalizer("a\034\034b")
581
3d4a255c 582for example.
a0cb3900 583
899dc88a
JH
584Since hash keys are strings, the default normalizer will not
585distinguish between C<undef> and the empty string. It also won't work
3d4a255c
JH
586when the function's arguments are references. For example, consider a
587function C<g> which gets two arguments: A number, and a reference to
899dc88a 588an array of numbers:
a0cb3900
JH
589
590 g(13, [1,2,3,4,5,6,7]);
591
592The default normalizer will turn this into something like
3d4a255c 593C<"13\034ARRAY(0x436c1f)">. That would be all right, except that a
a0cb3900
JH
594subsequent array of numbers might be stored at a different location
595even though it contains the same data. If this happens, C<Memoize>
596will think that the arguments are different, even though they are
597equivalent. In this case, a normalizer like this is appropriate:
598
599 sub normalize { join ' ', $_[0], @{$_[1]} }
600
601For the example above, this produces the key "13 1 2 3 4 5 6 7".
602
603Another use for normalizers is when the function depends on data other
604than those in its arguments. Suppose you have a function which
605returns a value which depends on the current hour of the day:
606
607 sub on_duty {
608 my ($problem_type) = @_;
609 my $hour = (localtime)[2];
610 open my $fh, "$DIR/$problem_type" or die...;
611 my $line;
612 while ($hour-- > 0)
613 $line = <$fh>;
614 }
615 return $line;
616 }
617
3d4a255c 618At 10:23, this function generates the 10th line of a data file; at
a0cb3900
JH
6193:45 PM it generates the 15th line instead. By default, C<Memoize>
620will only see the $problem_type argument. To fix this, include the
621current hour in the normalizer:
622
623 sub normalize { join ' ', (localtime)[2], @_ }
624
625The calling context of the function (scalar or list context) is
626propagated to the normalizer. This means that if the memoized
627function will treat its arguments differently in list context than it
628would in scalar context, you can have the normalizer function select
629its behavior based on the results of C<wantarray>. Even if called in
630a list context, a normalizer should still return a single string.
631
632=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
633
634Normally, C<Memoize> caches your function's return values into an
635ordinary Perl hash variable. However, you might like to have the
636values cached on the disk, so that they persist from one run of your
637program to the next, or you might like to associate some other
3d4a255c 638interesting semantics with the cached values.
a0cb3900
JH
639
640There's a slight complication under the hood of C<Memoize>: There are
641actually I<two> caches, one for scalar values and one for list values.
642When your function is called in scalar context, its return value is
643cached in one hash, and when your function is called in list context,
644its value is cached in the other hash. You can control the caching
645behavior of both contexts independently with these options.
646
647The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
648the following four strings:
649
650 MEMORY
651 FAULT
652 MERGE
3d4a255c 653 HASH
a0cb3900
JH
654
655or else it must be a reference to a list whose first element is one of
656these four strings, such as C<[HASH, arguments...]>.
657
658=over 4
659
660=item C<MEMORY>
661
662C<MEMORY> means that return values from the function will be cached in
663an ordinary Perl hash variable. The hash variable will not persist
664after the program exits. This is the default.
665
666=item C<HASH>
667
668C<HASH> allows you to specify that a particular hash that you supply
669will be used as the cache. You can tie this hash beforehand to give
670it any behavior you want.
671
672A tied hash can have any semantics at all. It is typically tied to an
673on-disk database, so that cached values are stored in the database and
674retrieved from it again when needed, and the disk file typically
675persists after your program has exited. See C<perltie> for more
676complete details about C<tie>.
677
678A typical example is:
679
3d4a255c 680 use DB_File;
a0cb3900
JH
681 tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666;
682 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
683
684This has the effect of storing the cache in a C<DB_File> database
685whose name is in C<$filename>. The cache will persist after the
686program has exited. Next time the program runs, it will find the
687cache already populated from the previous run of the program. Or you
688can forcibly populate the cache by constructing a batch program that
689runs in the background and populates the cache file. Then when you
690come to run your real program the memoized function will be fast
691because all its results have been precomputed.
692
693=item C<TIE>
694
695This option is B<strongly deprecated> and will be removed
899dc88a 696in the B<next> release of C<Memoize>. Use the C<HASH> option instead.
a0cb3900 697
3d4a255c 698 memoize ... [TIE, PACKAGE, ARGS...]
a0cb3900
JH
699
700is merely a shortcut for
701
3d4a255c
JH
702 require PACKAGE;
703 tie my %cache, PACKAGE, ARGS...;
a0cb3900
JH
704 memoize ... [HASH => \%cache];
705
a0cb3900
JH
706=item C<FAULT>
707
708C<FAULT> means that you never expect to call the function in scalar
709(or list) context, and that if C<Memoize> detects such a call, it
710should abort the program. The error message is one of
711
712 `foo' function called in forbidden list context at line ...
713 `foo' function called in forbidden scalar context at line ...
714
715=item C<MERGE>
716
717C<MERGE> normally means the function does not distinguish between list
718and sclar context, and that return values in both contexts should be
719stored together. C<LIST_CACHE =E<gt> MERGE> means that list context
720return values should be stored in the same hash that is used for
721scalar context returns, and C<SCALAR_CACHE =E<gt> MERGE> means the
722same, mutatis mutandis. It is an error to specify C<MERGE> for both,
723but it probably does something useful.
724
725Consider this function:
726
727 sub pi { 3; }
728
729Normally, the following code will result in two calls to C<pi>:
730
731 $x = pi();
732 ($y) = pi();
733 $z = pi();
734
735The first call caches the value C<3> in the scalar cache; the second
736caches the list C<(3)> in the list cache. The third call doesn't call
737the real C<pi> function; it gets the value from the scalar cache.
738
739Obviously, the second call to C<pi> is a waste of time, and storing
3d4a255c
JH
740its return value is a waste of space. Specifying C<LIST_CACHE =E<gt>
741MERGE> will make C<memoize> use the same cache for scalar and list
742context return values, so that the second call uses the scalar cache
743that was populated by the first call. C<pi> ends up being called only
744once, and both subsequent calls return C<3> from the cache, regardless
745of the calling context.
a0cb3900
JH
746
747Another use for C<MERGE> is when you want both kinds of return values
748stored in the same disk file; this saves you from having to deal with
749two disk files instead of one. You can use a normalizer function to
750keep the two sets of return values separate. For example:
751
752 tie my %cache => 'MLDBM', 'DB_File', $filename, ...;
753
754 memoize 'myfunc',
755 NORMALIZER => 'n',
756 SCALAR_CACHE => [HASH => \%cache],
757 LIST_CACHE => MERGE,
758 ;
759
760 sub n {
761 my $context = wantarray() ? 'L' : 'S';
762 # ... now compute the hash key from the arguments ...
763 $hashkey = "$context:$hashkey";
764 }
765
766This normalizer function will store scalar context return values in
767the disk file under keys that begin with C<S:>, and list context
768return values under keys that begin with C<L:>.
769
770=back
771
772=head1 OTHER FACILITIES
773
774=head2 C<unmemoize>
775
776There's an C<unmemoize> function that you can import if you want to.
777Why would you want to? Here's an example: Suppose you have your cache
778tied to a DBM file, and you want to make sure that the cache is
779written out to disk if someone interrupts the program. If the program
780exits normally, this will happen anyway, but if someone types
781control-C or something then the program will terminate immediately
782without synchronizing the database. So what you can do instead is
783
784 $SIG{INT} = sub { unmemoize 'function' };
785
a0cb3900
JH
786C<unmemoize> accepts a reference to, or the name of a previously
787memoized function, and undoes whatever it did to provide the memoized
788version in the first place, including making the name refer to the
789unmemoized version if appropriate. It returns a reference to the
790unmemoized version of the function.
791
792If you ask it to unmemoize a function that was never memoized, it
793croaks.
794
795=head2 C<flush_cache>
796
797C<flush_cache(function)> will flush out the caches, discarding I<all>
3d4a255c 798the cached data. The argument may be a function name or a reference
a0cb3900
JH
799to a function. For finer control over when data is discarded or
800expired, see the documentation for C<Memoize::Expire>, included in
801this package.
802
803Note that if the cache is a tied hash, C<flush_cache> will attempt to
804invoke the C<CLEAR> method on the hash. If there is no C<CLEAR>
805method, this will cause a run-time error.
806
807An alternative approach to cache flushing is to use the C<HASH> option
808(see above) to request that C<Memoize> use a particular hash variable
809as its cache. Then you can examine or modify the hash at any time in
3d4a255c 810any way you desire. You may flush the cache by using C<%hash = ()>.
a0cb3900
JH
811
812=head1 CAVEATS
813
814Memoization is not a cure-all:
815
816=over 4
817
818=item *
819
820Do not memoize a function whose behavior depends on program
821state other than its own arguments, such as global variables, the time
822of day, or file input. These functions will not produce correct
823results when memoized. For a particularly easy example:
824
825 sub f {
826 time;
827 }
828
829This function takes no arguments, and as far as C<Memoize> is
830concerned, it always returns the same result. C<Memoize> is wrong, of
831course, and the memoized version of this function will call C<time> once
832to get the current time, and it will return that same time
833every time you call it after that.
834
835=item *
836
837Do not memoize a function with side effects.
838
839 sub f {
840 my ($a, $b) = @_;
841 my $s = $a + $b;
842 print "$a + $b = $s.\n";
843 }
844
845This function accepts two arguments, adds them, and prints their sum.
846Its return value is the numuber of characters it printed, but you
847probably didn't care about that. But C<Memoize> doesn't understand
848that. If you memoize this function, you will get the result you
849expect the first time you ask it to print the sum of 2 and 3, but
850subsequent calls will return 1 (the return value of
851C<print>) without actually printing anything.
852
853=item *
854
855Do not memoize a function that returns a data structure that is
856modified by its caller.
857
858Consider these functions: C<getusers> returns a list of users somehow,
859and then C<main> throws away the first user on the list and prints the
860rest:
861
862 sub main {
863 my $userlist = getusers();
864 shift @$userlist;
865 foreach $u (@$userlist) {
866 print "User $u\n";
867 }
868 }
869
870 sub getusers {
871 my @users;
872 # Do something to get a list of users;
873 \@users; # Return reference to list.
874 }
875
876If you memoize C<getusers> here, it will work right exactly once. The
877reference to the users list will be stored in the memo table. C<main>
878will discard the first element from the referenced list. The next
879time you invoke C<main>, C<Memoize> will not call C<getusers>; it will
880just return the same reference to the same list it got last time. But
881this time the list has already had its head removed; C<main> will
882erroneously remove another element from it. The list will get shorter
883and shorter every time you call C<main>.
884
885Similarly, this:
886
887 $u1 = getusers();
888 $u2 = getusers();
889 pop @$u1;
890
891will modify $u2 as well as $u1, because both variables are references
892to the same array. Had C<getusers> not been memoized, $u1 and $u2
893would have referred to different arrays.
894
895=item *
896
897Do not memoize a very simple function.
898
899Recently someone mentioned to me that the Memoize module made his
900program run slower instead of faster. It turned out that he was
901memoizing the following function:
902
903 sub square {
904 $_[0] * $_[0];
905 }
906
907I pointed out that C<Memoize> uses a hash, and that looking up a
908number in the hash is necessarily going to take a lot longer than a
909single multiplication. There really is no way to speed up the
910C<square> function.
911
912Memoization is not magical.
913
914=back
915
916=head1 PERSISTENT CACHE SUPPORT
917
918You can tie the cache tables to any sort of tied hash that you want
919to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
920C<EXISTS>. For example,
921
922 tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666;
923 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
924
925works just fine. For some storage methods, you need a little glue.
926
927C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
928package is a glue module called C<Memoize::SDBM_File> which does
929provide one. Use this instead of plain C<SDBM_File> to store your
930cache table on disk in an C<SDBM_File> database:
931
932 tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666;
933 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
934
935C<NDBM_File> has the same problem and the same solution. (Use
899dc88a 936C<Memoize::NDBM_File instead of plain NDBM_File.>)
a0cb3900
JH
937
938C<Storable> isn't a tied hash class at all. You can use it to store a
939hash to disk and retrieve it again, but you can't modify the hash while
940it's on the disk. So if you want to store your cache table in a
941C<Storable> database, use C<Memoize::Storable>, which puts a hashlike
942front-end onto C<Storable>. The hash table is actually kept in
943memory, and is loaded from your C<Storable> file at the time you
944memoize the function, and stored back at the time you unmemoize the
945function (or when your program exits):
946
947 tie my %cache => 'Memoize::Storable', $filename;
948 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
949
950 tie my %cache => 'Memoize::Storable', $filename, 'nstore';
951 memoize 'function', SCALAR_CACHE => [HASH => \%cache];
952
953Include the `nstore' option to have the C<Storable> database written
954in `network order'. (See L<Storable> for more details about this.)
955
3d4a255c
JH
956The C<flush_cache()> function will raise a run-time error unless the
957tied package provides a C<CLEAR> method.
958
a0cb3900
JH
959=head1 EXPIRATION SUPPORT
960
961See Memoize::Expire, which is a plug-in module that adds expiration
962functionality to Memoize. If you don't like the kinds of policies
963that Memoize::Expire implements, it is easy to write your own plug-in
964module to implement whatever policy you desire. Memoize comes with
965several examples. An expiration manager that implements a LRU policy
966is available on CPAN as Memoize::ExpireLRU.
967
968=head1 BUGS
969
970The test suite is much better, but always needs improvement.
971
3d4a255c
JH
972There is some problem with the way C<goto &f> works under threaded
973Perl, perhaps because of the lexical scoping of C<@_>. This is a bug
974in Perl, and until it is resolved, memoized functions will see a
975slightly different C<caller()> and will perform a little more slowly
976on threaded perls than unthreaded perls.
a0cb3900
JH
977
978Here's a bug that isn't my fault: Some versions of C<DB_File> won't
979let you store data under a key of length 0. That means that if you
980have a function C<f> which you memoized and the cache is in a
981C<DB_File> database, then the value of C<f()> (C<f> called with no
982arguments) will not be memoized. Let us all breathe deeply and repeat
3d4a255c
JH
983this mantra: ``Gosh, Keith, that sure was a stupid thing to do.'' If
984this is a big problem, you can write a tied hash class which is a
985front-end to C<DB_File> that prepends <x> to every key before storing
986it.
a0cb3900
JH
987
988=head1 MAILING LIST
989
990To join a very low-traffic mailing list for announcements about
991C<Memoize>, send an empty note to C<mjd-perl-memoize-request@plover.com>.
992
993=head1 AUTHOR
994
995Mark-Jason Dominus (C<mjd-perl-memoize+@plover.com>), Plover Systems co.
996
997See the C<Memoize.pm> Page at http://www.plover.com/~mjd/perl/Memoize/
998for news and upgrades. Near this page, at
999http://www.plover.com/~mjd/perl/MiniMemoize/ there is an article about
1000memoization and about the internals of Memoize that appeared in The
1001Perl Journal, issue #13. (This article is also included in the
1002Memoize distribution as `article.html'.)
1003
3d4a255c
JH
1004My upcoming book will discuss memoization (and many other fascinating
1005topics) in tremendous detail. It will be published by Morgan Kaufmann
1006in 2002, possibly under the title I<Perl Advanced Techniques
1007Handbook>. It will also be available on-line for free. For more
1008information, visit http://perl.plover.com/book/ .
1009
a0cb3900
JH
1010To join a mailing list for announcements about C<Memoize>, send an
1011empty message to C<mjd-perl-memoize-request@plover.com>. This mailing
1012list is for announcements only and has extremely low traffic---about
3d4a255c 1013two messages per year.
a0cb3900 1014
899dc88a
JH
1015=head1 COPYRIGHT AND LICENSE
1016
1017Copyright 1998, 1999, 2000, 2001 by Mark Jason Dominus
1018
1019This library is free software; you may redistribute it and/or modify
3d4a255c 1020it under the same terms as Perl itself.
899dc88a 1021
a0cb3900
JH
1022=head1 THANK YOU
1023
1024Many thanks to Jonathan Roy for bug reports and suggestions, to
1025Michael Schwern for other bug reports and patches, to Mike Cariaso for
1026helping me to figure out the Right Thing to Do About Expiration, to
3d4a255c
JH
1027Joshua Gerth, Joshua Chamas, Jonathan Roy (again), Mark D. Anderson,
1028and Andrew Johnson for more suggestions about expiration, to Brent
1029Powers for the Memoize::ExpireLRU module, to Ariel Scolnicov for
1030delightful messages about the Fibonacci function, to Dion Almaer for
a0cb3900
JH
1031thought-provoking suggestions about the default normalizer, to Walt
1032Mankowski and Kurt Starsinic for much help investigating problems
1033under threaded Perl, to Alex Dudkevich for reporting the bug in
1034prototyped functions and for checking my patch, to Tony Bass for many
3d4a255c
JH
1035helpful suggestions, to Jonathan Roy (again) for finding a use for
1036C<unmemoize()>, to Philippe Verdret for enlightening discussion of
1037C<Hook::PrePostCall>, to Nat Torkington for advice I ignored, to Chris
a0cb3900
JH
1038Nandor for portability advice, to Randal Schwartz for suggesting the
1039'C<flush_cache> function, and to Jenda Krynicky for being a light in
1040the world.
1041
899dc88a
JH
1042Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including
1043this module in the core and for his patient and helpful guidance
1044during the integration process.
3d4a255c 1045
a0cb3900 1046=cut