perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package Benchmark;
	2
	3	=head1 NAME
	4
	5	Benchmark - benchmark running times of Perl code
	6
	7	=head1 SYNOPSIS
	8
	9	use Benchmark qw(:all) ;
	10
	11	timethis ($count, "code");
	12
	13	# Use Perl code in strings...
	14	timethese($count, {
	15	'Name1' => '...code1...',
	16	'Name2' => '...code2...',
	17	});
	18
	19	# ... or use subroutine references.
	20	timethese($count, {
	21	'Name1' => sub { ...code1... },
	22	'Name2' => sub { ...code2... },
	23	});
	24
	25	# cmpthese can be used both ways as well
	26	cmpthese($count, {
	27	'Name1' => '...code1...',
	28	'Name2' => '...code2...',
	29	});
	30
	31	cmpthese($count, {
	32	'Name1' => sub { ...code1... },
	33	'Name2' => sub { ...code2... },
	34	});
	35
	36	# ...or in two stages
	37	$results = timethese($count,
	38	{
	39	'Name1' => sub { ...code1... },
	40	'Name2' => sub { ...code2... },
	41	},
	42	'none'
	43	);
	44	cmpthese( $results ) ;
	45
	46	$t = timeit($count, '...other code...')
	47	print "$count loops of other code took:",timestr($t),"\n";
	48
	49	$t = countit($time, '...other code...')
	50	$count = $t->iters ;
	51	print "$count loops of other code took:",timestr($t),"\n";
	52
	53	# enable hires wallclock timing if possible
	54	use Benchmark ':hireswallclock';
	55
	56	=head1 DESCRIPTION
	57
	58	The Benchmark module encapsulates a number of routines to help you
	59	figure out how long it takes to execute some code.
	60
	61	timethis - run a chunk of code several times
	62
	63	timethese - run several chunks of code several times
	64
	65	cmpthese - print results of timethese as a comparison chart
	66
	67	timeit - run a chunk of code and see how long it goes
	68
	69	countit - see how many times a chunk of code runs in a given time
	70
	71
	72	=head2 Methods
	73
	74	=over 10
	75
	76	=item new
	77
	78	Returns the current time. Example:
	79
	80	use Benchmark;
	81	$t0 = new Benchmark;
	82	# ... your code here ...
	83	$t1 = new Benchmark;
	84	$td = timediff($t1, $t0);
	85	print "the code took:",timestr($td),"\n";
	86
	87	=item debug
	88
	89	Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
	90
	91	debug Benchmark 1;
	92	$t = timeit(10, ' 5 ** $Global ');
	93	debug Benchmark 0;
	94
	95	=item iters
	96
	97	Returns the number of iterations.
	98
	99	=back
	100
	101	=head2 Standard Exports
	102
	103	The following routines will be exported into your namespace
	104	if you use the Benchmark module:
	105
	106	=over 10
	107
	108	=item timeit(COUNT, CODE)
	109
	110	Arguments: COUNT is the number of times to run the loop, and CODE is
	111	the code to run. CODE may be either a code reference or a string to
	112	be eval'd; either way it will be run in the caller's package.
	113
	114	Returns: a Benchmark object.
	115
	116	=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
	117
	118	Time COUNT iterations of CODE. CODE may be a string to eval or a
	119	code reference; either way the CODE will run in the caller's package.
	120	Results will be printed to STDOUT as TITLE followed by the times.
	121	TITLE defaults to "timethis COUNT" if none is provided. STYLE
	122	determines the format of the output, as described for timestr() below.
	123
	124	The COUNT can be zero or negative: this means the I<minimum number of
	125	CPU seconds> to run. A zero signifies the default of 3 seconds. For
	126	example to run at least for 10 seconds:
	127
	128	timethis(-10, $code)
	129
	130	or to run two pieces of code tests for at least 3 seconds:
	131
	132	timethese(0, { test1 => '...', test2 => '...'})
	133
	134	CPU seconds is, in UNIX terms, the user time plus the system time of
	135	the process itself, as opposed to the real (wallclock) time and the
	136	time spent by the child processes. Less than 0.1 seconds is not
	137	accepted (-0.01 as the count, for example, will cause a fatal runtime
	138	exception).
	139
	140	Note that the CPU seconds is the B<minimum> time: CPU scheduling and
	141	other operating system factors may complicate the attempt so that a
	142	little bit more time is spent. The benchmark output will, however,
	143	also tell the number of C<$code> runs/second, which should be a more
	144	interesting number than the actually spent seconds.
	145
	146	Returns a Benchmark object.
	147
	148	=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
	149
	150	The CODEHASHREF is a reference to a hash containing names as keys
	151	and either a string to eval or a code reference for each value.
	152	For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
	153	call
	154
	155	timethis(COUNT, VALUE, KEY, STYLE)
	156
	157	The routines are called in string comparison order of KEY.
	158
	159	The COUNT can be zero or negative, see timethis().
	160
	161	Returns a hash of Benchmark objects, keyed by name.
	162
	163	=item timediff ( T1, T2 )
	164
	165	Returns the difference between two Benchmark times as a Benchmark
	166	object suitable for passing to timestr().
	167
	168	=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
	169
	170	Returns a string that formats the times in the TIMEDIFF object in
	171	the requested STYLE. TIMEDIFF is expected to be a Benchmark object
	172	similar to that returned by timediff().
	173
	174	STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
	175	each of the 5 times available ('wallclock' time, user time, system time,
	176	user time of children, and system time of children). 'noc' shows all
	177	except the two children times. 'nop' shows only wallclock and the
	178	two children times. 'auto' (the default) will act as 'all' unless
	179	the children times are both zero, in which case it acts as 'noc'.
	180	'none' prevents output.
	181
	182	FORMAT is the L<printf(3)>-style format specifier (without the
	183	leading '%') to use to print the times. It defaults to '5.2f'.
	184
	185	=back
	186
	187	=head2 Optional Exports
	188
	189	The following routines will be exported into your namespace
	190	if you specifically ask that they be imported:
	191
	192	=over 10
	193
	194	=item clearcache ( COUNT )
	195
	196	Clear the cached time for COUNT rounds of the null loop.
	197
	198	=item clearallcache ( )
	199
	200	Clear all cached times.
	201
	202	=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
	203
	204	=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
	205
	206	Optionally calls timethese(), then outputs comparison chart. This:
	207
	208	cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	209
	210	outputs a chart like:
	211
	212	Rate b a
	213	b 2831802/s -- -61%
	214	a 7208959/s 155% --
	215
	216	This chart is sorted from slowest to fastest, and shows the percent speed
	217	difference between each pair of tests.
	218
	219	c<cmpthese> can also be passed the data structure that timethese() returns:
	220
	221	$results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	222	cmpthese( $results );
	223
	224	in case you want to see both sets of results.
	225
	226	Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
	227	above chart, including labels. This:
	228
	229	my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
	230
	231	returns a data structure like:
	232
	233	[
	234	[ '', 'Rate', 'b', 'a' ],
	235	[ 'b', '2885232/s', '--', '-59%' ],
	236	[ 'a', '7099126/s', '146%', '--' ],
	237	]
	238
	239	B<NOTE>: This result value differs from previous versions, which returned
	240	the C<timethese()> result structure. If you want that, just use the two
	241	statement C<timethese>...C<cmpthese> idiom shown above.
	242
	243	Incidently, note the variance in the result values between the two examples;
	244	this is typical of benchmarking. If this were a real benchmark, you would
	245	probably want to run a lot more iterations.
	246
	247	=item countit(TIME, CODE)
	248
	249	Arguments: TIME is the minimum length of time to run CODE for, and CODE is
	250	the code to run. CODE may be either a code reference or a string to
	251	be eval'd; either way it will be run in the caller's package.
	252
	253	TIME is I<not> negative. countit() will run the loop many times to
	254	calculate the speed of CODE before running it for TIME. The actual
	255	time run for will usually be greater than TIME due to system clock
	256	resolution, so it's best to look at the number of iterations divided
	257	by the times that you are concerned with, not just the iterations.
	258
	259	Returns: a Benchmark object.
	260
	261	=item disablecache ( )
	262
	263	Disable caching of timings for the null loop. This will force Benchmark
	264	to recalculate these timings for each new piece of code timed.
	265
	266	=item enablecache ( )
	267
	268	Enable caching of timings for the null loop. The time taken for COUNT
	269	rounds of the null loop will be calculated only once for each
	270	different COUNT used.
	271
	272	=item timesum ( T1, T2 )
	273
	274	Returns the sum of two Benchmark times as a Benchmark object suitable
	275	for passing to timestr().
	276
	277	=back
	278
	279	=head2 :hireswallclock
	280
	281	If the Time::HiRes module has been installed, you can specify the
	282	special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
	283	available, the tag will be silently ignored). This tag will cause the
	284	wallclock time to be measured in microseconds, instead of integer
	285	seconds. Note though that the speed computations are still conducted
	286	in CPU time, not wallclock time.
	287
	288	=head1 NOTES
	289
	290	The data is stored as a list of values from the time and times
	291	functions:
	292
	293	($real, $user, $system, $children_user, $children_system, $iters)
	294
	295	in seconds for the whole loop (not divided by the number of rounds).
	296
	297	The timing is done using time(3) and times(3).
	298
	299	Code is executed in the caller's package.
	300
	301	The time of the null loop (a loop with the same
	302	number of rounds but empty loop body) is subtracted
	303	from the time of the real loop.
	304
	305	The null loop times can be cached, the key being the
	306	number of rounds. The caching can be controlled using
	307	calls like these:
	308
	309	clearcache($key);
	310	clearallcache();
	311
	312	disablecache();
	313	enablecache();
	314
	315	Caching is off by default, as it can (usually slightly) decrease
	316	accuracy and does not usually noticably affect runtimes.
	317
	318	=head1 EXAMPLES
	319
	320	For example,
	321
	322	use Benchmark qw( cmpthese ) ;
	323	$x = 3;
	324	cmpthese( -5, {
	325	a => sub{$x*$x},
	326	b => sub{$x**2},
	327	} );
	328
	329	outputs something like this:
	330
	331	Benchmark: running a, b, each for at least 5 CPU seconds...
	332	Rate b a
	333	b 1559428/s -- -62%
	334	a 4152037/s 166% --
	335
	336
	337	while
	338
	339	use Benchmark qw( timethese cmpthese ) ;
	340	$x = 3;
	341	$r = timethese( -5, {
	342	a => sub{$x*$x},
	343	b => sub{$x**2},
	344	} );
	345	cmpthese $r;
	346
	347	outputs something like this:
	348
	349	Benchmark: running a, b, each for at least 5 CPU seconds...
	350	a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
	351	b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
	352	Rate b a
	353	b 1574945/s -- -59%
	354	a 3835056/s 144% --
	355
	356
	357	=head1 INHERITANCE
	358
	359	Benchmark inherits from no other class, except of course
	360	for Exporter.
	361
	362	=head1 CAVEATS
	363
	364	Comparing eval'd strings with code references will give you
	365	inaccurate results: a code reference will show a slightly slower
	366	execution time than the equivalent eval'd string.
	367
	368	The real time timing is done using time(2) and
	369	the granularity is therefore only one second.
	370
	371	Short tests may produce negative figures because perl
	372	can appear to take longer to execute the empty loop
	373	than a short test; try:
	374
	375	timethis(100,'1');
	376
	377	The system time of the null loop might be slightly
	378	more than the system time of the loop with the actual
	379	code and therefore the difference might end up being E<lt> 0.
	380
	381	=head1 SEE ALSO
	382
	383	L<Devel::DProf> - a Perl code profiler
	384
	385	=head1 AUTHORS
	386
	387	Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
	388
	389	=head1 MODIFICATION HISTORY
	390
	391	September 8th, 1994; by Tim Bunce.
	392
	393	March 28th, 1997; by Hugo van der Sanden: added support for code
	394	references and the already documented 'debug' method; revamped
	395	documentation.
	396
	397	April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
	398	functionality.
	399
	400	September, 1999; by Barrie Slaymaker: math fixes and accuracy and
	401	efficiency tweaks. Added cmpthese(). A result is now returned from
	402	timethese(). Exposed countit() (was runfor()).
	403
	404	December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
	405	and return an empty string. If cmpthese is calling timethese, make it pass the
	406	style in. (so that 'none' will suppress output). Make sub new dump its
	407	debugging output to STDERR, to be consistent with everything else.
	408	All bugs found while writing a regression test.
	409
	410	September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
	411
	412	=cut
	413
	414	# evaluate something in a clean lexical environment
	415	sub _doeval { eval shift }
	416
	417	#
	418	# put any lexicals at file scope AFTER here
	419	#
	420
	421	use Carp;
	422	use Exporter;
	423	@ISA=(Exporter);
	424	@EXPORT=qw(timeit timethis timethese timediff timestr);
	425	@EXPORT_OK=qw(timesum cmpthese countit
	426	clearcache clearallcache disablecache enablecache);
	427	%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
	428
	429	$VERSION = 1.0501;
	430
	431	# --- ':hireswallclock' special handling
	432
	433	my $hirestime;
	434
	435	sub mytime () { time }
	436
	437	&init;
	438
	439	sub BEGIN {
	440	if (eval 'require Time::HiRes') {
	441	import Time::HiRes qw(time);
	442	$hirestime = \&Time::HiRes::time;
	443	}
	444	}
	445
	446	sub import {
	447	my $class = shift;
	448	if (grep { $_ eq ":hireswallclock" } @_) {
	449	@_ = grep { $_ ne ":hireswallclock" } @_;
	450	*mytime = $hirestime if defined $hirestime;
	451	}
	452	Benchmark->export_to_level(1, $class, @_);
	453	}
	454
	455	sub init {
	456	$debug = 0;
	457	$min_count = 4;
	458	$min_cpu = 0.4;
	459	$defaultfmt = '5.2f';
	460	$defaultstyle = 'auto';
	461	# The cache can cause a slight loss of sys time accuracy. If a
	462	# user does many tests (>10) with very large counts (>10000)
	463	# or works on a very slow machine the cache may be useful.
	464	&disablecache;
	465	&clearallcache;
	466	}
	467
	468	sub debug { $debug = ($_[1] != 0); }
	469
	470	# The cache needs two branches: 's' for strings and 'c' for code. The
	471	# emtpy loop is different in these two cases.
	472	sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
	473	sub clearallcache { %cache = (); }
	474	sub enablecache { $cache = 1; }
	475	sub disablecache { $cache = 0; }
	476
	477	# --- Functions to process the 'time' data type
	478
	479	sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
	480	print STDERR "new=@t\n" if $debug;
	481	bless \@t; }
	482
	483	sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
	484	sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
	485	sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
	486	sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
	487	sub iters { $_[0]->[5] ; }
	488
	489	sub timediff {
	490	my($a, $b) = @_;
	491	my @r;
	492	for (my $i=0; $i < @$a; ++$i) {
	493	push(@r, $a->[$i] - $b->[$i]);
	494	}
	495	bless \@r;
	496	}
	497
	498	sub timesum {
	499	my($a, $b) = @_;
	500	my @r;
	501	for (my $i=0; $i < @$a; ++$i) {
	502	push(@r, $a->[$i] + $b->[$i]);
	503	}
	504	bless \@r;
	505	}
	506
	507	sub timestr {
	508	my($tr, $style, $f) = @_;
	509	my @t = @$tr;
	510	warn "bad time value (@t)" unless @t==6;
	511	my($r, $pu, $ps, $cu, $cs, $n) = @t;
	512	my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
	513	$f = $defaultfmt unless defined $f;
	514	# format a time in the required style, other formats may be added here
	515	$style \|\|= $defaultstyle;
	516	return '' if $style eq 'none';
	517	$style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
	518	my $s = "@t $style"; # default for unknown style
	519	my $w = $hirestime ? "%2g" : "%2d";
	520	$s=sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
	521	$r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
	522	$s=sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
	523	$r,$pu,$ps,$pt) if $style eq 'noc';
	524	$s=sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
	525	$r,$cu,$cs,$ct) if $style eq 'nop';
	526	$s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
	527	$s;
	528	}
	529
	530	sub timedebug {
	531	my($msg, $t) = @_;
	532	print STDERR "$msg",timestr($t),"\n" if $debug;
	533	}
	534
	535	# --- Functions implementing low-level support for timing loops
	536
	537	sub runloop {
	538	my($n, $c) = @_;
	539
	540	$n+=0; # force numeric now, so garbage won't creep into the eval
	541	croak "negative loopcount $n" if $n<0;
	542	confess "Usage: runloop(number, [string \| coderef])" unless defined $c;
	543	my($t0, $t1, $td); # before, after, difference
	544
	545	# find package of caller so we can execute code there
	546	my($curpack) = caller(0);
	547	my($i, $pack)= 0;
	548	while (($pack) = caller(++$i)) {
	549	last if $pack ne $curpack;
	550	}
	551
	552	my ($subcode, $subref);
	553	if (ref $c eq 'CODE') {
	554	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
	555	$subref = eval $subcode;
	556	}
	557	else {
	558	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
	559	$subref = _doeval($subcode);
	560	}
	561	croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
	562	print STDERR "runloop $n '$subcode'\n" if $debug;
	563
	564	# Wait for the user timer to tick. This makes the error range more like
	565	# -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
	566	# may not seem important, but it significantly reduces the chances of
	567	# getting a too low initial $n in the initial, 'find the minimum' loop
	568	# in &countit. This, in turn, can reduce the number of calls to
	569	# &runloop a lot, and thus reduce additive errors.
	570	my $tbase = Benchmark->new(0)->[1];
	571	while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
	572	&$subref;
	573	$t1 = Benchmark->new($n);
	574	$td = &timediff($t1, $t0);
	575	timedebug("runloop:",$td);
	576	$td;
	577	}
	578
	579
	580	sub timeit {
	581	my($n, $code) = @_;
	582	my($wn, $wc, $wd);
	583
	584	printf STDERR "timeit $n $code\n" if $debug;
	585	my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
	586	if ($cache && exists $cache{$cache_key} ) {
	587	$wn = $cache{$cache_key};
	588	} else {
	589	$wn = &runloop($n, ref( $code ) ? sub { } : '' );
	590	# Can't let our baseline have any iterations, or they get subtracted
	591	# out of the result.
	592	$wn->[5] = 0;
	593	$cache{$cache_key} = $wn;
	594	}
	595
	596	$wc = &runloop($n, $code);
	597
	598	$wd = timediff($wc, $wn);
	599	timedebug("timeit: ",$wc);
	600	timedebug(" - ",$wn);
	601	timedebug(" = ",$wd);
	602
	603	$wd;
	604	}
	605
	606
	607	my $default_for = 3;
	608	my $min_for = 0.1;
	609
	610
	611	sub countit {
	612	my ( $tmax, $code ) = @_;
	613
	614	if ( not defined $tmax or $tmax == 0 ) {
	615	$tmax = $default_for;
	616	} elsif ( $tmax < 0 ) {
	617	$tmax = -$tmax;
	618	}
	619
	620	die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
	621	if $tmax < $min_for;
	622
	623	my ($n, $tc);
	624
	625	# First find the minimum $n that gives a significant timing.
	626	for ($n = 1; ; $n *= 2 ) {
	627	my $td = timeit($n, $code);
	628	$tc = $td->[1] + $td->[2];
	629	last if $tc > 0.1;
	630	}
	631
	632	my $nmin = $n;
	633
	634	# Get $n high enough that we can guess the final $n with some accuracy.
	635	my $tpra = 0.1 * $tmax; # Target/time practice.
	636	while ( $tc < $tpra ) {
	637	# The 5% fudge is to keep us from iterating again all
	638	# that often (this speeds overall responsiveness when $tmax is big
	639	# and we guess a little low). This does not noticably affect
	640	# accuracy since we're not couting these times.
	641	$n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
	642	my $td = timeit($n, $code);
	643	my $new_tc = $td->[1] + $td->[2];
	644	# Make sure we are making progress.
	645	$tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
	646	}
	647
	648	# Now, do the 'for real' timing(s), repeating until we exceed
	649	# the max.
	650	my $ntot = 0;
	651	my $rtot = 0;
	652	my $utot = 0.0;
	653	my $stot = 0.0;
	654	my $cutot = 0.0;
	655	my $cstot = 0.0;
	656	my $ttot = 0.0;
	657
	658	# The 5% fudge is because $n is often a few % low even for routines
	659	# with stable times and avoiding extra timeit()s is nice for
	660	# accuracy's sake.
	661	$n = int( $n * ( 1.05 * $tmax / $tc ) );
	662
	663	while () {
	664	my $td = timeit($n, $code);
	665	$ntot += $n;
	666	$rtot += $td->[0];
	667	$utot += $td->[1];
	668	$stot += $td->[2];
	669	$cutot += $td->[3];
	670	$cstot += $td->[4];
	671	$ttot = $utot + $stot;
	672	last if $ttot >= $tmax;
	673
	674	$ttot = 0.01 if $ttot < 0.01;
	675	my $r = $tmax / $ttot - 1; # Linear approximation.
	676	$n = int( $r * $ntot );
	677	$n = $nmin if $n < $nmin;
	678	}
	679
	680	return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
	681	}
	682
	683	# --- Functions implementing high-level time-then-print utilities
	684
	685	sub n_to_for {
	686	my $n = shift;
	687	return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
	688	}
	689
	690	sub timethis{
	691	my($n, $code, $title, $style) = @_;
	692	my($t, $for, $forn);
	693
	694	if ( $n > 0 ) {
	695	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	696	$t = timeit($n, $code);
	697	$title = "timethis $n" unless defined $title;
	698	} else {
	699	$fort = n_to_for( $n );
	700	$t = countit( $fort, $code );
	701	$title = "timethis for $fort" unless defined $title;
	702	$forn = $t->[-1];
	703	}
	704	local $\| = 1;
	705	$style = "" unless defined $style;
	706	printf("%10s: ", $title) unless $style eq 'none';
	707	print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
	708
	709	$n = $forn if defined $forn;
	710
	711	# A conservative warning to spot very silly tests.
	712	# Don't assume that your benchmark is ok simply because
	713	# you don't get this warning!
	714	print " (warning: too few iterations for a reliable count)\n"
	715	if $n < $min_count
	716	\|\| ($t->real < 1 && $n < 1000)
	717	\|\| $t->cpu_a < $min_cpu;
	718	$t;
	719	}
	720
	721	sub timethese{
	722	my($n, $alt, $style) = @_;
	723	die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
	724	unless ref $alt eq HASH;
	725	my @names = sort keys %$alt;
	726	$style = "" unless defined $style;
	727	print "Benchmark: " unless $style eq 'none';
	728	if ( $n > 0 ) {
	729	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	730	print "timing $n iterations of" unless $style eq 'none';
	731	} else {
	732	print "running" unless $style eq 'none';
	733	}
	734	print " ", join(', ',@names) unless $style eq 'none';
	735	unless ( $n > 0 ) {
	736	my $for = n_to_for( $n );
	737	print ", each" if $n > 1 && $style ne 'none';
	738	print " for at least $for CPU seconds" unless $style eq 'none';
	739	}
	740	print "...\n" unless $style eq 'none';
	741
	742	# we could save the results in an array and produce a summary here
	743	# sum, min, max, avg etc etc
	744	my %results;
	745	foreach my $name (@names) {
	746	$results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
	747	}
	748
	749	return \%results;
	750	}
	751
	752	sub cmpthese{
	753	my ($results, $style) =
	754	ref $_ [0] ? @_
	755	: (timethese (@_ [0, 1], @_ > 2 ? $_ [2] : "none"), $_ [2]);
	756
	757	$style = "" unless defined $style;
	758
	759	# Flatten in to an array of arrays with the name as the first field
	760	my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
	761
	762	for (@vals) {
	763	# The epsilon fudge here is to prevent div by 0. Since clock
	764	# resolutions are much larger, it's below the noise floor.
	765	my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
	766	$_->[7] = $rate;
	767	}
	768
	769	# Sort by rate
	770	@vals = sort { $a->[7] <=> $b->[7] } @vals;
	771
	772	# If more than half of the rates are greater than one...
	773	my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
	774
	775	my @rows;
	776	my @col_widths;
	777
	778	my @top_row = (
	779	'',
	780	$display_as_rate ? 'Rate' : 's/iter',
	781	map { $_->[0] } @vals
	782	);
	783
	784	push @rows, \@top_row;
	785	@col_widths = map { length( $_ ) } @top_row;
	786
	787	# Build the data rows
	788	# We leave the last column in even though it never has any data. Perhaps
	789	# it should go away. Also, perhaps a style for a single column of
	790	# percentages might be nice.
	791	for my $row_val ( @vals ) {
	792	my @row;
	793
	794	# Column 0 = test name
	795	push @row, $row_val->[0];
	796	$col_widths[0] = length( $row_val->[0] )
	797	if length( $row_val->[0] ) > $col_widths[0];
	798
	799	# Column 1 = performance
	800	my $row_rate = $row_val->[7];
	801
	802	# We assume that we'll never get a 0 rate.
	803	my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
	804
	805	# Only give a few decimal places before switching to sci. notation,
	806	# since the results aren't usually that accurate anyway.
	807	my $format =
	808	$a >= 100 ?
	809	"%0.0f" :
	810	$a >= 10 ?
	811	"%0.1f" :
	812	$a >= 1 ?
	813	"%0.2f" :
	814	$a >= 0.1 ?
	815	"%0.3f" :
	816	"%0.2e";
	817
	818	$format .= "/s"
	819	if $display_as_rate;
	820	# Using $b here due to optimizing bug in _58 through _61
	821	my $b = sprintf( $format, $a );
	822	push @row, $b;
	823	$col_widths[1] = length( $b )
	824	if length( $b ) > $col_widths[1];
	825
	826	# Columns 2..N = performance ratios
	827	my $skip_rest = 0;
	828	for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
	829	my $col_val = $vals[$col_num];
	830	my $out;
	831	if ( $skip_rest ) {
	832	$out = '';
	833	}
	834	elsif ( $col_val->[0] eq $row_val->[0] ) {
	835	$out = "--";
	836	# $skip_rest = 1;
	837	}
	838	else {
	839	my $col_rate = $col_val->[7];
	840	$out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
	841	}
	842	push @row, $out;
	843	$col_widths[$col_num+2] = length( $out )
	844	if length( $out ) > $col_widths[$col_num+2];
	845
	846	# A little wierdness to set the first column width properly
	847	$col_widths[$col_num+2] = length( $col_val->[0] )
	848	if length( $col_val->[0] ) > $col_widths[$col_num+2];
	849	}
	850	push @rows, \@row;
	851	}
	852
	853	return \@rows if $style eq "none";
	854
	855	# Equalize column widths in the chart as much as possible without
	856	# exceeding 80 characters. This does not use or affect cols 0 or 1.
	857	my @sorted_width_refs =
	858	sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
	859	my $max_width = ${$sorted_width_refs[-1]};
	860
	861	my $total = @col_widths - 1 ;
	862	for ( @col_widths ) { $total += $_ }
	863
	864	STRETCHER:
	865	while ( $total < 80 ) {
	866	my $min_width = ${$sorted_width_refs[0]};
	867	last
	868	if $min_width == $max_width;
	869	for ( @sorted_width_refs ) {
	870	last
	871	if $$_ > $min_width;
	872	++$$_;
	873	++$total;
	874	last STRETCHER
	875	if $total >= 80;
	876	}
	877	}
	878
	879	# Dump the output
	880	my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
	881	substr( $format, 1, 0 ) = '-';
	882	for ( @rows ) {
	883	printf $format, @$_;
	884	}
	885
	886	return \@rows ;
	887	}
	888
	889
	890	1;