perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package Benchmark;
	2
	3	use strict;
	4
	5
	6	=head1 NAME
	7
	8	Benchmark - benchmark running times of Perl code
	9
	10	=head1 SYNOPSIS
	11
	12	use Benchmark qw(:all) ;
	13
	14	timethis ($count, "code");
	15
	16	# Use Perl code in strings...
	17	timethese($count, {
	18	'Name1' => '...code1...',
	19	'Name2' => '...code2...',
	20	});
	21
	22	# ... or use subroutine references.
	23	timethese($count, {
	24	'Name1' => sub { ...code1... },
	25	'Name2' => sub { ...code2... },
	26	});
	27
	28	# cmpthese can be used both ways as well
	29	cmpthese($count, {
	30	'Name1' => '...code1...',
	31	'Name2' => '...code2...',
	32	});
	33
	34	cmpthese($count, {
	35	'Name1' => sub { ...code1... },
	36	'Name2' => sub { ...code2... },
	37	});
	38
	39	# ...or in two stages
	40	$results = timethese($count,
	41	{
	42	'Name1' => sub { ...code1... },
	43	'Name2' => sub { ...code2... },
	44	},
	45	'none'
	46	);
	47	cmpthese( $results ) ;
	48
	49	$t = timeit($count, '...other code...')
	50	print "$count loops of other code took:",timestr($t),"\n";
	51
	52	$t = countit($time, '...other code...')
	53	$count = $t->iters ;
	54	print "$count loops of other code took:",timestr($t),"\n";
	55
	56	# enable hires wallclock timing if possible
	57	use Benchmark ':hireswallclock';
	58
	59	=head1 DESCRIPTION
	60
	61	The Benchmark module encapsulates a number of routines to help you
	62	figure out how long it takes to execute some code.
	63
	64	timethis - run a chunk of code several times
	65
	66	timethese - run several chunks of code several times
	67
	68	cmpthese - print results of timethese as a comparison chart
	69
	70	timeit - run a chunk of code and see how long it goes
	71
	72	countit - see how many times a chunk of code runs in a given time
	73
	74
	75	=head2 Methods
	76
	77	=over 10
	78
	79	=item new
	80
	81	Returns the current time. Example:
	82
	83	use Benchmark;
	84	$t0 = new Benchmark;
	85	# ... your code here ...
	86	$t1 = new Benchmark;
	87	$td = timediff($t1, $t0);
	88	print "the code took:",timestr($td),"\n";
	89
	90	=item debug
	91
	92	Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
	93
	94	debug Benchmark 1;
	95	$t = timeit(10, ' 5 ** $Global ');
	96	debug Benchmark 0;
	97
	98	=item iters
	99
	100	Returns the number of iterations.
	101
	102	=back
	103
	104	=head2 Standard Exports
	105
	106	The following routines will be exported into your namespace
	107	if you use the Benchmark module:
	108
	109	=over 10
	110
	111	=item timeit(COUNT, CODE)
	112
	113	Arguments: COUNT is the number of times to run the loop, and CODE is
	114	the code to run. CODE may be either a code reference or a string to
	115	be eval'd; either way it will be run in the caller's package.
	116
	117	Returns: a Benchmark object.
	118
	119	=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
	120
	121	Time COUNT iterations of CODE. CODE may be a string to eval or a
	122	code reference; either way the CODE will run in the caller's package.
	123	Results will be printed to STDOUT as TITLE followed by the times.
	124	TITLE defaults to "timethis COUNT" if none is provided. STYLE
	125	determines the format of the output, as described for timestr() below.
	126
	127	The COUNT can be zero or negative: this means the I<minimum number of
	128	CPU seconds> to run. A zero signifies the default of 3 seconds. For
	129	example to run at least for 10 seconds:
	130
	131	timethis(-10, $code)
	132
	133	or to run two pieces of code tests for at least 3 seconds:
	134
	135	timethese(0, { test1 => '...', test2 => '...'})
	136
	137	CPU seconds is, in UNIX terms, the user time plus the system time of
	138	the process itself, as opposed to the real (wallclock) time and the
	139	time spent by the child processes. Less than 0.1 seconds is not
	140	accepted (-0.01 as the count, for example, will cause a fatal runtime
	141	exception).
	142
	143	Note that the CPU seconds is the B<minimum> time: CPU scheduling and
	144	other operating system factors may complicate the attempt so that a
	145	little bit more time is spent. The benchmark output will, however,
	146	also tell the number of C<$code> runs/second, which should be a more
	147	interesting number than the actually spent seconds.
	148
	149	Returns a Benchmark object.
	150
	151	=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
	152
	153	The CODEHASHREF is a reference to a hash containing names as keys
	154	and either a string to eval or a code reference for each value.
	155	For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
	156	call
	157
	158	timethis(COUNT, VALUE, KEY, STYLE)
	159
	160	The routines are called in string comparison order of KEY.
	161
	162	The COUNT can be zero or negative, see timethis().
	163
	164	Returns a hash reference of Benchmark objects, keyed by name.
	165
	166	=item timediff ( T1, T2 )
	167
	168	Returns the difference between two Benchmark times as a Benchmark
	169	object suitable for passing to timestr().
	170
	171	=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
	172
	173	Returns a string that formats the times in the TIMEDIFF object in
	174	the requested STYLE. TIMEDIFF is expected to be a Benchmark object
	175	similar to that returned by timediff().
	176
	177	STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
	178	each of the 5 times available ('wallclock' time, user time, system time,
	179	user time of children, and system time of children). 'noc' shows all
	180	except the two children times. 'nop' shows only wallclock and the
	181	two children times. 'auto' (the default) will act as 'all' unless
	182	the children times are both zero, in which case it acts as 'noc'.
	183	'none' prevents output.
	184
	185	FORMAT is the L<printf(3)>-style format specifier (without the
	186	leading '%') to use to print the times. It defaults to '5.2f'.
	187
	188	=back
	189
	190	=head2 Optional Exports
	191
	192	The following routines will be exported into your namespace
	193	if you specifically ask that they be imported:
	194
	195	=over 10
	196
	197	=item clearcache ( COUNT )
	198
	199	Clear the cached time for COUNT rounds of the null loop.
	200
	201	=item clearallcache ( )
	202
	203	Clear all cached times.
	204
	205	=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
	206
	207	=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
	208
	209	Optionally calls timethese(), then outputs comparison chart. This:
	210
	211	cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	212
	213	outputs a chart like:
	214
	215	Rate b a
	216	b 2831802/s -- -61%
	217	a 7208959/s 155% --
	218
	219	This chart is sorted from slowest to fastest, and shows the percent speed
	220	difference between each pair of tests.
	221
	222	c<cmpthese> can also be passed the data structure that timethese() returns:
	223
	224	$results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	225	cmpthese( $results );
	226
	227	in case you want to see both sets of results.
	228	If the first argument is an unblessed hash reference,
	229	that is RESULTSHASHREF; otherwise that is COUNT.
	230
	231	Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
	232	above chart, including labels. This:
	233
	234	my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
	235
	236	returns a data structure like:
	237
	238	[
	239	[ '', 'Rate', 'b', 'a' ],
	240	[ 'b', '2885232/s', '--', '-59%' ],
	241	[ 'a', '7099126/s', '146%', '--' ],
	242	]
	243
	244	B<NOTE>: This result value differs from previous versions, which returned
	245	the C<timethese()> result structure. If you want that, just use the two
	246	statement C<timethese>...C<cmpthese> idiom shown above.
	247
	248	Incidently, note the variance in the result values between the two examples;
	249	this is typical of benchmarking. If this were a real benchmark, you would
	250	probably want to run a lot more iterations.
	251
	252	=item countit(TIME, CODE)
	253
	254	Arguments: TIME is the minimum length of time to run CODE for, and CODE is
	255	the code to run. CODE may be either a code reference or a string to
	256	be eval'd; either way it will be run in the caller's package.
	257
	258	TIME is I<not> negative. countit() will run the loop many times to
	259	calculate the speed of CODE before running it for TIME. The actual
	260	time run for will usually be greater than TIME due to system clock
	261	resolution, so it's best to look at the number of iterations divided
	262	by the times that you are concerned with, not just the iterations.
	263
	264	Returns: a Benchmark object.
	265
	266	=item disablecache ( )
	267
	268	Disable caching of timings for the null loop. This will force Benchmark
	269	to recalculate these timings for each new piece of code timed.
	270
	271	=item enablecache ( )
	272
	273	Enable caching of timings for the null loop. The time taken for COUNT
	274	rounds of the null loop will be calculated only once for each
	275	different COUNT used.
	276
	277	=item timesum ( T1, T2 )
	278
	279	Returns the sum of two Benchmark times as a Benchmark object suitable
	280	for passing to timestr().
	281
	282	=back
	283
	284	=head2 :hireswallclock
	285
	286	If the Time::HiRes module has been installed, you can specify the
	287	special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
	288	available, the tag will be silently ignored). This tag will cause the
	289	wallclock time to be measured in microseconds, instead of integer
	290	seconds. Note though that the speed computations are still conducted
	291	in CPU time, not wallclock time.
	292
	293	=head1 NOTES
	294
	295	The data is stored as a list of values from the time and times
	296	functions:
	297
	298	($real, $user, $system, $children_user, $children_system, $iters)
	299
	300	in seconds for the whole loop (not divided by the number of rounds).
	301
	302	The timing is done using time(3) and times(3).
	303
	304	Code is executed in the caller's package.
	305
	306	The time of the null loop (a loop with the same
	307	number of rounds but empty loop body) is subtracted
	308	from the time of the real loop.
	309
	310	The null loop times can be cached, the key being the
	311	number of rounds. The caching can be controlled using
	312	calls like these:
	313
	314	clearcache($key);
	315	clearallcache();
	316
	317	disablecache();
	318	enablecache();
	319
	320	Caching is off by default, as it can (usually slightly) decrease
	321	accuracy and does not usually noticably affect runtimes.
	322
	323	=head1 EXAMPLES
	324
	325	For example,
	326
	327	use Benchmark qw( cmpthese ) ;
	328	$x = 3;
	329	cmpthese( -5, {
	330	a => sub{$x*$x},
	331	b => sub{$x**2},
	332	} );
	333
	334	outputs something like this:
	335
	336	Benchmark: running a, b, each for at least 5 CPU seconds...
	337	Rate b a
	338	b 1559428/s -- -62%
	339	a 4152037/s 166% --
	340
	341
	342	while
	343
	344	use Benchmark qw( timethese cmpthese ) ;
	345	$x = 3;
	346	$r = timethese( -5, {
	347	a => sub{$x*$x},
	348	b => sub{$x**2},
	349	} );
	350	cmpthese $r;
	351
	352	outputs something like this:
	353
	354	Benchmark: running a, b, each for at least 5 CPU seconds...
	355	a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
	356	b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
	357	Rate b a
	358	b 1574945/s -- -59%
	359	a 3835056/s 144% --
	360
	361
	362	=head1 INHERITANCE
	363
	364	Benchmark inherits from no other class, except of course
	365	for Exporter.
	366
	367	=head1 CAVEATS
	368
	369	Comparing eval'd strings with code references will give you
	370	inaccurate results: a code reference will show a slightly slower
	371	execution time than the equivalent eval'd string.
	372
	373	The real time timing is done using time(2) and
	374	the granularity is therefore only one second.
	375
	376	Short tests may produce negative figures because perl
	377	can appear to take longer to execute the empty loop
	378	than a short test; try:
	379
	380	timethis(100,'1');
	381
	382	The system time of the null loop might be slightly
	383	more than the system time of the loop with the actual
	384	code and therefore the difference might end up being E<lt> 0.
	385
	386	=head1 SEE ALSO
	387
	388	L<Devel::DProf> - a Perl code profiler
	389
	390	=head1 AUTHORS
	391
	392	Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
	393
	394	=head1 MODIFICATION HISTORY
	395
	396	September 8th, 1994; by Tim Bunce.
	397
	398	March 28th, 1997; by Hugo van der Sanden: added support for code
	399	references and the already documented 'debug' method; revamped
	400	documentation.
	401
	402	April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
	403	functionality.
	404
	405	September, 1999; by Barrie Slaymaker: math fixes and accuracy and
	406	efficiency tweaks. Added cmpthese(). A result is now returned from
	407	timethese(). Exposed countit() (was runfor()).
	408
	409	December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
	410	and return an empty string. If cmpthese is calling timethese, make it pass the
	411	style in. (so that 'none' will suppress output). Make sub new dump its
	412	debugging output to STDERR, to be consistent with everything else.
	413	All bugs found while writing a regression test.
	414
	415	September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
	416
	417	February, 2004; by Chia-liang Kao: make cmpthese and timestr use time
	418	statistics for children instead of parent when the style is 'nop'.
	419
	420	November, 2007; by Christophe Grosjean: make cmpthese and timestr compute
	421	time consistently with style argument, default is 'all' not 'noc' any more.
	422
	423	=cut
	424
	425	# evaluate something in a clean lexical environment
	426	sub _doeval { no strict; eval shift }
	427
	428	#
	429	# put any lexicals at file scope AFTER here
	430	#
	431
	432	use Carp;
	433	use Exporter;
	434
	435	our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
	436
	437	@ISA=qw(Exporter);
	438	@EXPORT=qw(timeit timethis timethese timediff timestr);
	439	@EXPORT_OK=qw(timesum cmpthese countit
	440	clearcache clearallcache disablecache enablecache);
	441	%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
	442
	443	$VERSION = 1.10;
	444
	445	# --- ':hireswallclock' special handling
	446
	447	my $hirestime;
	448
	449	sub mytime () { time }
	450
	451	init();
	452
	453	sub BEGIN {
	454	if (eval 'require Time::HiRes') {
	455	import Time::HiRes qw(time);
	456	$hirestime = \&Time::HiRes::time;
	457	}
	458	}
	459
	460	sub import {
	461	my $class = shift;
	462	if (grep { $_ eq ":hireswallclock" } @_) {
	463	@_ = grep { $_ ne ":hireswallclock" } @_;
	464	local $^W=0;
	465	*mytime = $hirestime if defined $hirestime;
	466	}
	467	Benchmark->export_to_level(1, $class, @_);
	468	}
	469
	470	our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style,
	471	%_Usage, %Cache, $Do_Cache);
	472
	473	sub init {
	474	$Debug = 0;
	475	$Min_Count = 4;
	476	$Min_CPU = 0.4;
	477	$Default_Format = '5.2f';
	478	$Default_Style = 'auto';
	479	# The cache can cause a slight loss of sys time accuracy. If a
	480	# user does many tests (>10) with very large counts (>10000)
	481	# or works on a very slow machine the cache may be useful.
	482	disablecache();
	483	clearallcache();
	484	}
	485
	486	sub debug { $Debug = ($_[1] != 0); }
	487
	488	sub usage {
	489	my $calling_sub = (caller(1))[3];
	490	$calling_sub =~ s/^Benchmark:://;
	491	return $_Usage{$calling_sub} \|\| '';
	492	}
	493
	494	# The cache needs two branches: 's' for strings and 'c' for code. The
	495	# empty loop is different in these two cases.
	496
	497	$_Usage{clearcache} = <<'USAGE';
	498	usage: clearcache($count);
	499	USAGE
	500
	501	sub clearcache {
	502	die usage unless @_ == 1;
	503	delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"};
	504	}
	505
	506	$_Usage{clearallcache} = <<'USAGE';
	507	usage: clearallcache();
	508	USAGE
	509
	510	sub clearallcache {
	511	die usage if @_;
	512	%Cache = ();
	513	}
	514
	515	$_Usage{enablecache} = <<'USAGE';
	516	usage: enablecache();
	517	USAGE
	518
	519	sub enablecache {
	520	die usage if @_;
	521	$Do_Cache = 1;
	522	}
	523
	524	$_Usage{disablecache} = <<'USAGE';
	525	usage: disablecache();
	526	USAGE
	527
	528	sub disablecache {
	529	die usage if @_;
	530	$Do_Cache = 0;
	531	}
	532
	533
	534	# --- Functions to process the 'time' data type
	535
	536	sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
	537	print STDERR "new=@t\n" if $Debug;
	538	bless \@t; }
	539
	540	sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
	541	sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
	542	sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
	543	sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
	544	sub iters { $_[0]->[5] ; }
	545
	546
	547	$_Usage{timediff} = <<'USAGE';
	548	usage: $result_diff = timediff($result1, $result2);
	549	USAGE
	550
	551	sub timediff {
	552	my($a, $b) = @_;
	553
	554	die usage unless ref $a and ref $b;
	555
	556	my @r;
	557	for (my $i=0; $i < @$a; ++$i) {
	558	push(@r, $a->[$i] - $b->[$i]);
	559	}
	560	#die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]\|@$b[1,2])\n"
	561	# if ($r[1] + $r[2]) < 0;
	562	bless \@r;
	563	}
	564
	565	$_Usage{timesum} = <<'USAGE';
	566	usage: $sum = timesum($result1, $result2);
	567	USAGE
	568
	569	sub timesum {
	570	my($a, $b) = @_;
	571
	572	die usage unless ref $a and ref $b;
	573
	574	my @r;
	575	for (my $i=0; $i < @$a; ++$i) {
	576	push(@r, $a->[$i] + $b->[$i]);
	577	}
	578	bless \@r;
	579	}
	580
	581
	582	$_Usage{timestr} = <<'USAGE';
	583	usage: $formatted_result = timestr($result1);
	584	USAGE
	585
	586	sub timestr {
	587	my($tr, $style, $f) = @_;
	588
	589	die usage unless ref $tr;
	590
	591	my @t = @$tr;
	592	warn "bad time value (@t)" unless @t==6;
	593	my($r, $pu, $ps, $cu, $cs, $n) = @t;
	594	my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
	595	$f = $Default_Format unless defined $f;
	596	# format a time in the required style, other formats may be added here
	597	$style \|\|= $Default_Style;
	598	return '' if $style eq 'none';
	599	$style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
	600	my $s = "@t $style"; # default for unknown style
	601	my $w = $hirestime ? "%2g" : "%2d";
	602	$s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
	603	$r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
	604	$s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
	605	$r,$pu,$ps,$pt) if $style eq 'noc';
	606	$s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
	607	$r,$cu,$cs,$ct) if $style eq 'nop';
	608	my $elapsed = do {
	609	if ($style eq 'nop') {$cu+$cs}
	610	elsif ($style eq 'noc') {$pu+$ps}
	611	else {$cu+$cs+$pu+$ps}
	612	};
	613	$s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed;
	614	$s;
	615	}
	616
	617	sub timedebug {
	618	my($msg, $t) = @_;
	619	print STDERR "$msg",timestr($t),"\n" if $Debug;
	620	}
	621
	622	# --- Functions implementing low-level support for timing loops
	623
	624	$_Usage{runloop} = <<'USAGE';
	625	usage: runloop($number, [$string \| $coderef])
	626	USAGE
	627
	628	sub runloop {
	629	my($n, $c) = @_;
	630
	631	$n+=0; # force numeric now, so garbage won't creep into the eval
	632	croak "negative loopcount $n" if $n<0;
	633	confess usage unless defined $c;
	634	my($t0, $t1, $td); # before, after, difference
	635
	636	# find package of caller so we can execute code there
	637	my($curpack) = caller(0);
	638	my($i, $pack)= 0;
	639	while (($pack) = caller(++$i)) {
	640	last if $pack ne $curpack;
	641	}
	642
	643	my ($subcode, $subref);
	644	if (ref $c eq 'CODE') {
	645	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
	646	$subref = eval $subcode;
	647	}
	648	else {
	649	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
	650	$subref = _doeval($subcode);
	651	}
	652	croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
	653	print STDERR "runloop $n '$subcode'\n" if $Debug;
	654
	655	# Wait for the user timer to tick. This makes the error range more like
	656	# -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
	657	# may not seem important, but it significantly reduces the chances of
	658	# getting a too low initial $n in the initial, 'find the minimum' loop
	659	# in &countit. This, in turn, can reduce the number of calls to
	660	# &runloop a lot, and thus reduce additive errors.
	661	my $tbase = Benchmark->new(0)->[1];
	662	while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
	663	$subref->();
	664	$t1 = Benchmark->new($n);
	665	$td = &timediff($t1, $t0);
	666	timedebug("runloop:",$td);
	667	$td;
	668	}
	669
	670	$_Usage{timeit} = <<'USAGE';
	671	usage: $result = timeit($count, 'code' ); or
	672	$result = timeit($count, sub { code } );
	673	USAGE
	674
	675	sub timeit {
	676	my($n, $code) = @_;
	677	my($wn, $wc, $wd);
	678
	679	die usage unless defined $code and
	680	(!ref $code or ref $code eq 'CODE');
	681
	682	printf STDERR "timeit $n $code\n" if $Debug;
	683	my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
	684	if ($Do_Cache && exists $Cache{$cache_key} ) {
	685	$wn = $Cache{$cache_key};
	686	} else {
	687	$wn = &runloop($n, ref( $code ) ? sub { } : '' );
	688	# Can't let our baseline have any iterations, or they get subtracted
	689	# out of the result.
	690	$wn->[5] = 0;
	691	$Cache{$cache_key} = $wn;
	692	}
	693
	694	$wc = &runloop($n, $code);
	695
	696	$wd = timediff($wc, $wn);
	697	timedebug("timeit: ",$wc);
	698	timedebug(" - ",$wn);
	699	timedebug(" = ",$wd);
	700
	701	$wd;
	702	}
	703
	704
	705	my $default_for = 3;
	706	my $min_for = 0.1;
	707
	708
	709	$_Usage{countit} = <<'USAGE';
	710	usage: $result = countit($time, 'code' ); or
	711	$result = countit($time, sub { code } );
	712	USAGE
	713
	714	sub countit {
	715	my ( $tmax, $code ) = @_;
	716
	717	die usage unless @_;
	718
	719	if ( not defined $tmax or $tmax == 0 ) {
	720	$tmax = $default_for;
	721	} elsif ( $tmax < 0 ) {
	722	$tmax = -$tmax;
	723	}
	724
	725	die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
	726	if $tmax < $min_for;
	727
	728	my ($n, $tc);
	729
	730	# First find the minimum $n that gives a significant timing.
	731	my $zeros=0;
	732	for ($n = 1; ; $n *= 2 ) {
	733	my $td = timeit($n, $code);
	734	$tc = $td->[1] + $td->[2];
	735	if ( $tc <= 0 and $n > 1024 ) {
	736	++$zeros > 16
	737	and die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
	738	} else {
	739	$zeros = 0;
	740	}
	741	last if $tc > 0.1;
	742	}
	743
	744	my $nmin = $n;
	745
	746	# Get $n high enough that we can guess the final $n with some accuracy.
	747	my $tpra = 0.1 * $tmax; # Target/time practice.
	748	while ( $tc < $tpra ) {
	749	# The 5% fudge is to keep us from iterating again all
	750	# that often (this speeds overall responsiveness when $tmax is big
	751	# and we guess a little low). This does not noticably affect
	752	# accuracy since we're not couting these times.
	753	$n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
	754	my $td = timeit($n, $code);
	755	my $new_tc = $td->[1] + $td->[2];
	756	# Make sure we are making progress.
	757	$tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
	758	}
	759
	760	# Now, do the 'for real' timing(s), repeating until we exceed
	761	# the max.
	762	my $ntot = 0;
	763	my $rtot = 0;
	764	my $utot = 0.0;
	765	my $stot = 0.0;
	766	my $cutot = 0.0;
	767	my $cstot = 0.0;
	768	my $ttot = 0.0;
	769
	770	# The 5% fudge is because $n is often a few % low even for routines
	771	# with stable times and avoiding extra timeit()s is nice for
	772	# accuracy's sake.
	773	$n = int( $n * ( 1.05 * $tmax / $tc ) );
	774	$zeros=0;
	775	while () {
	776	my $td = timeit($n, $code);
	777	$ntot += $n;
	778	$rtot += $td->[0];
	779	$utot += $td->[1];
	780	$stot += $td->[2];
	781	$cutot += $td->[3];
	782	$cstot += $td->[4];
	783	$ttot = $utot + $stot;
	784	last if $ttot >= $tmax;
	785	if ( $ttot <= 0 ) {
	786	++$zeros > 16
	787	and die "Timing is consistently zero, cannot benchmark. N=$n\n";
	788	} else {
	789	$zeros = 0;
	790	}
	791	$ttot = 0.01 if $ttot < 0.01;
	792	my $r = $tmax / $ttot - 1; # Linear approximation.
	793	$n = int( $r * $ntot );
	794	$n = $nmin if $n < $nmin;
	795	}
	796
	797	return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
	798	}
	799
	800	# --- Functions implementing high-level time-then-print utilities
	801
	802	sub n_to_for {
	803	my $n = shift;
	804	return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
	805	}
	806
	807	$_Usage{timethis} = <<'USAGE';
	808	usage: $result = timethis($time, 'code' ); or
	809	$result = timethis($time, sub { code } );
	810	USAGE
	811
	812	sub timethis{
	813	my($n, $code, $title, $style) = @_;
	814	my($t, $forn);
	815
	816	die usage unless defined $code and
	817	(!ref $code or ref $code eq 'CODE');
	818
	819	if ( $n > 0 ) {
	820	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	821	$t = timeit($n, $code);
	822	$title = "timethis $n" unless defined $title;
	823	} else {
	824	my $fort = n_to_for( $n );
	825	$t = countit( $fort, $code );
	826	$title = "timethis for $fort" unless defined $title;
	827	$forn = $t->[-1];
	828	}
	829	local $\| = 1;
	830	$style = "" unless defined $style;
	831	printf("%10s: ", $title) unless $style eq 'none';
	832	print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none';
	833
	834	$n = $forn if defined $forn;
	835
	836	# A conservative warning to spot very silly tests.
	837	# Don't assume that your benchmark is ok simply because
	838	# you don't get this warning!
	839	print " (warning: too few iterations for a reliable count)\n"
	840	if $n < $Min_Count
	841	\|\| ($t->real < 1 && $n < 1000)
	842	\|\| $t->cpu_a < $Min_CPU;
	843	$t;
	844	}
	845
	846
	847	$_Usage{timethese} = <<'USAGE';
	848	usage: timethese($count, { Name1 => 'code1', ... }); or
	849	timethese($count, { Name1 => sub { code1 }, ... });
	850	USAGE
	851
	852	sub timethese{
	853	my($n, $alt, $style) = @_;
	854	die usage unless ref $alt eq 'HASH';
	855
	856	my @names = sort keys %$alt;
	857	$style = "" unless defined $style;
	858	print "Benchmark: " unless $style eq 'none';
	859	if ( $n > 0 ) {
	860	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	861	print "timing $n iterations of" unless $style eq 'none';
	862	} else {
	863	print "running" unless $style eq 'none';
	864	}
	865	print " ", join(', ',@names) unless $style eq 'none';
	866	unless ( $n > 0 ) {
	867	my $for = n_to_for( $n );
	868	print ", each" if $n > 1 && $style ne 'none';
	869	print " for at least $for CPU seconds" unless $style eq 'none';
	870	}
	871	print "...\n" unless $style eq 'none';
	872
	873	# we could save the results in an array and produce a summary here
	874	# sum, min, max, avg etc etc
	875	my %results;
	876	foreach my $name (@names) {
	877	$results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
	878	}
	879
	880	return \%results;
	881	}
	882
	883
	884	$_Usage{cmpthese} = <<'USAGE';
	885	usage: cmpthese($count, { Name1 => 'code1', ... }); or
	886	cmpthese($count, { Name1 => sub { code1 }, ... }); or
	887	cmpthese($result, $style);
	888	USAGE
	889
	890	sub cmpthese{
	891	my ($results, $style);
	892
	893	# $count can be a blessed object.
	894	if ( ref $_[0] eq 'HASH' ) {
	895	($results, $style) = @_;
	896	}
	897	else {
	898	my($count, $code) = @_[0,1];
	899	$style = $_[2] if defined $_[2];
	900
	901	die usage unless ref $code eq 'HASH';
	902
	903	$results = timethese($count, $code, ($style \|\| "none"));
	904	}
	905
	906	$style = "" unless defined $style;
	907
	908	# Flatten in to an array of arrays with the name as the first field
	909	my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
	910
	911	for (@vals) {
	912	# The epsilon fudge here is to prevent div by 0. Since clock
	913	# resolutions are much larger, it's below the noise floor.
	914	my $elapsed = do {
	915	if ($style eq 'nop') {$_->[4]+$_->[5]}
	916	elsif ($style eq 'noc') {$_->[2]+$_->[3]}
	917	else {$_->[2]+$_->[3]+$_->[4]+$_->[5]}
	918	};
	919	my $rate = $_->[6]/(($elapsed)+0.000000000000001);
	920	$_->[7] = $rate;
	921	}
	922
	923	# Sort by rate
	924	@vals = sort { $a->[7] <=> $b->[7] } @vals;
	925
	926	# If more than half of the rates are greater than one...
	927	my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0;
	928
	929	my @rows;
	930	my @col_widths;
	931
	932	my @top_row = (
	933	'',
	934	$display_as_rate ? 'Rate' : 's/iter',
	935	map { $_->[0] } @vals
	936	);
	937
	938	push @rows, \@top_row;
	939	@col_widths = map { length( $_ ) } @top_row;
	940
	941	# Build the data rows
	942	# We leave the last column in even though it never has any data. Perhaps
	943	# it should go away. Also, perhaps a style for a single column of
	944	# percentages might be nice.
	945	for my $row_val ( @vals ) {
	946	my @row;
	947
	948	# Column 0 = test name
	949	push @row, $row_val->[0];
	950	$col_widths[0] = length( $row_val->[0] )
	951	if length( $row_val->[0] ) > $col_widths[0];
	952
	953	# Column 1 = performance
	954	my $row_rate = $row_val->[7];
	955
	956	# We assume that we'll never get a 0 rate.
	957	my $rate = $display_as_rate ? $row_rate : 1 / $row_rate;
	958
	959	# Only give a few decimal places before switching to sci. notation,
	960	# since the results aren't usually that accurate anyway.
	961	my $format =
	962	$rate >= 100 ?
	963	"%0.0f" :
	964	$rate >= 10 ?
	965	"%0.1f" :
	966	$rate >= 1 ?
	967	"%0.2f" :
	968	$rate >= 0.1 ?
	969	"%0.3f" :
	970	"%0.2e";
	971
	972	$format .= "/s"
	973	if $display_as_rate;
	974
	975	my $formatted_rate = sprintf( $format, $rate );
	976	push @row, $formatted_rate;
	977	$col_widths[1] = length( $formatted_rate )
	978	if length( $formatted_rate ) > $col_widths[1];
	979
	980	# Columns 2..N = performance ratios
	981	my $skip_rest = 0;
	982	for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
	983	my $col_val = $vals[$col_num];
	984	my $out;
	985	if ( $skip_rest ) {
	986	$out = '';
	987	}
	988	elsif ( $col_val->[0] eq $row_val->[0] ) {
	989	$out = "--";
	990	# $skip_rest = 1;
	991	}
	992	else {
	993	my $col_rate = $col_val->[7];
	994	$out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
	995	}
	996	push @row, $out;
	997	$col_widths[$col_num+2] = length( $out )
	998	if length( $out ) > $col_widths[$col_num+2];
	999
	1000	# A little wierdness to set the first column width properly
	1001	$col_widths[$col_num+2] = length( $col_val->[0] )
	1002	if length( $col_val->[0] ) > $col_widths[$col_num+2];
	1003	}
	1004	push @rows, \@row;
	1005	}
	1006
	1007	return \@rows if $style eq "none";
	1008
	1009	# Equalize column widths in the chart as much as possible without
	1010	# exceeding 80 characters. This does not use or affect cols 0 or 1.
	1011	my @sorted_width_refs =
	1012	sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
	1013	my $max_width = ${$sorted_width_refs[-1]};
	1014
	1015	my $total = @col_widths - 1 ;
	1016	for ( @col_widths ) { $total += $_ }
	1017
	1018	STRETCHER:
	1019	while ( $total < 80 ) {
	1020	my $min_width = ${$sorted_width_refs[0]};
	1021	last
	1022	if $min_width == $max_width;
	1023	for ( @sorted_width_refs ) {
	1024	last
	1025	if $$_ > $min_width;
	1026	++$$_;
	1027	++$total;
	1028	last STRETCHER
	1029	if $total >= 80;
	1030	}
	1031	}
	1032
	1033	# Dump the output
	1034	my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
	1035	substr( $format, 1, 0 ) = '-';
	1036	for ( @rows ) {
	1037	printf $format, @$_;
	1038	}
	1039
	1040	return \@rows ;
	1041	}
	1042
	1043
	1044	1;