perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package Benchmark;
	2
	3	use strict;
	4
	5
	6	=head1 NAME
	7
	8	Benchmark - benchmark running times of Perl code
	9
	10	=head1 SYNOPSIS
	11
	12	use Benchmark qw(:all) ;
	13
	14	timethis ($count, "code");
	15
	16	# Use Perl code in strings...
	17	timethese($count, {
	18	'Name1' => '...code1...',
	19	'Name2' => '...code2...',
	20	});
	21
	22	# ... or use subroutine references.
	23	timethese($count, {
	24	'Name1' => sub { ...code1... },
	25	'Name2' => sub { ...code2... },
	26	});
	27
	28	# cmpthese can be used both ways as well
	29	cmpthese($count, {
	30	'Name1' => '...code1...',
	31	'Name2' => '...code2...',
	32	});
	33
	34	cmpthese($count, {
	35	'Name1' => sub { ...code1... },
	36	'Name2' => sub { ...code2... },
	37	});
	38
	39	# ...or in two stages
	40	$results = timethese($count,
	41	{
	42	'Name1' => sub { ...code1... },
	43	'Name2' => sub { ...code2... },
	44	},
	45	'none'
	46	);
	47	cmpthese( $results ) ;
	48
	49	$t = timeit($count, '...other code...')
	50	print "$count loops of other code took:",timestr($t),"\n";
	51
	52	$t = countit($time, '...other code...')
	53	$count = $t->iters ;
	54	print "$count loops of other code took:",timestr($t),"\n";
	55
	56	# enable hires wallclock timing if possible
	57	use Benchmark ':hireswallclock';
	58
	59	=head1 DESCRIPTION
	60
	61	The Benchmark module encapsulates a number of routines to help you
	62	figure out how long it takes to execute some code.
	63
	64	timethis - run a chunk of code several times
	65
	66	timethese - run several chunks of code several times
	67
	68	cmpthese - print results of timethese as a comparison chart
	69
	70	timeit - run a chunk of code and see how long it goes
	71
	72	countit - see how many times a chunk of code runs in a given time
	73
	74
	75	=head2 Methods
	76
	77	=over 10
	78
	79	=item new
	80
	81	Returns the current time. Example:
	82
	83	use Benchmark;
	84	$t0 = Benchmark->new;
	85	# ... your code here ...
	86	$t1 = Benchmark->new;
	87	$td = timediff($t1, $t0);
	88	print "the code took:",timestr($td),"\n";
	89
	90	=item debug
	91
	92	Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
	93
	94	Benchmark->debug(1);
	95	$t = timeit(10, ' 5 ** $Global ');
	96	Benchmark->debug(0);
	97
	98	=item iters
	99
	100	Returns the number of iterations.
	101
	102	=back
	103
	104	=head2 Standard Exports
	105
	106	The following routines will be exported into your namespace
	107	if you use the Benchmark module:
	108
	109	=over 10
	110
	111	=item timeit(COUNT, CODE)
	112
	113	Arguments: COUNT is the number of times to run the loop, and CODE is
	114	the code to run. CODE may be either a code reference or a string to
	115	be eval'd; either way it will be run in the caller's package.
	116
	117	Returns: a Benchmark object.
	118
	119	=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
	120
	121	Time COUNT iterations of CODE. CODE may be a string to eval or a
	122	code reference; either way the CODE will run in the caller's package.
	123	Results will be printed to STDOUT as TITLE followed by the times.
	124	TITLE defaults to "timethis COUNT" if none is provided. STYLE
	125	determines the format of the output, as described for timestr() below.
	126
	127	The COUNT can be zero or negative: this means the I<minimum number of
	128	CPU seconds> to run. A zero signifies the default of 3 seconds. For
	129	example to run at least for 10 seconds:
	130
	131	timethis(-10, $code)
	132
	133	or to run two pieces of code tests for at least 3 seconds:
	134
	135	timethese(0, { test1 => '...', test2 => '...'})
	136
	137	CPU seconds is, in UNIX terms, the user time plus the system time of
	138	the process itself, as opposed to the real (wallclock) time and the
	139	time spent by the child processes. Less than 0.1 seconds is not
	140	accepted (-0.01 as the count, for example, will cause a fatal runtime
	141	exception).
	142
	143	Note that the CPU seconds is the B<minimum> time: CPU scheduling and
	144	other operating system factors may complicate the attempt so that a
	145	little bit more time is spent. The benchmark output will, however,
	146	also tell the number of C<$code> runs/second, which should be a more
	147	interesting number than the actually spent seconds.
	148
	149	Returns a Benchmark object.
	150
	151	=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
	152
	153	The CODEHASHREF is a reference to a hash containing names as keys
	154	and either a string to eval or a code reference for each value.
	155	For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
	156	call
	157
	158	timethis(COUNT, VALUE, KEY, STYLE)
	159
	160	The routines are called in string comparison order of KEY.
	161
	162	The COUNT can be zero or negative, see timethis().
	163
	164	Returns a hash reference of Benchmark objects, keyed by name.
	165
	166	=item timediff ( T1, T2 )
	167
	168	Returns the difference between two Benchmark times as a Benchmark
	169	object suitable for passing to timestr().
	170
	171	=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
	172
	173	Returns a string that formats the times in the TIMEDIFF object in
	174	the requested STYLE. TIMEDIFF is expected to be a Benchmark object
	175	similar to that returned by timediff().
	176
	177	STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
	178	each of the 5 times available ('wallclock' time, user time, system time,
	179	user time of children, and system time of children). 'noc' shows all
	180	except the two children times. 'nop' shows only wallclock and the
	181	two children times. 'auto' (the default) will act as 'all' unless
	182	the children times are both zero, in which case it acts as 'noc'.
	183	'none' prevents output.
	184
	185	FORMAT is the L<printf(3)>-style format specifier (without the
	186	leading '%') to use to print the times. It defaults to '5.2f'.
	187
	188	=back
	189
	190	=head2 Optional Exports
	191
	192	The following routines will be exported into your namespace
	193	if you specifically ask that they be imported:
	194
	195	=over 10
	196
	197	=item clearcache ( COUNT )
	198
	199	Clear the cached time for COUNT rounds of the null loop.
	200
	201	=item clearallcache ( )
	202
	203	Clear all cached times.
	204
	205	=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
	206
	207	=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
	208
	209	Optionally calls timethese(), then outputs comparison chart. This:
	210
	211	cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	212
	213	outputs a chart like:
	214
	215	Rate b a
	216	b 2831802/s -- -61%
	217	a 7208959/s 155% --
	218
	219	This chart is sorted from slowest to fastest, and shows the percent speed
	220	difference between each pair of tests.
	221
	222	C<cmpthese> can also be passed the data structure that timethese() returns:
	223
	224	$results = timethese( -1,
	225	{ a => "++\$i", b => "\$i *= 2" } ) ;
	226	cmpthese( $results );
	227
	228	in case you want to see both sets of results.
	229	If the first argument is an unblessed hash reference,
	230	that is RESULTSHASHREF; otherwise that is COUNT.
	231
	232	Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
	233	above chart, including labels. This:
	234
	235	my $rows = cmpthese( -1,
	236	{ a => '++$i', b => '$i *= 2' }, "none" );
	237
	238	returns a data structure like:
	239
	240	[
	241	[ '', 'Rate', 'b', 'a' ],
	242	[ 'b', '2885232/s', '--', '-59%' ],
	243	[ 'a', '7099126/s', '146%', '--' ],
	244	]
	245
	246	B<NOTE>: This result value differs from previous versions, which returned
	247	the C<timethese()> result structure. If you want that, just use the two
	248	statement C<timethese>...C<cmpthese> idiom shown above.
	249
	250	Incidentally, note the variance in the result values between the two examples;
	251	this is typical of benchmarking. If this were a real benchmark, you would
	252	probably want to run a lot more iterations.
	253
	254	=item countit(TIME, CODE)
	255
	256	Arguments: TIME is the minimum length of time to run CODE for, and CODE is
	257	the code to run. CODE may be either a code reference or a string to
	258	be eval'd; either way it will be run in the caller's package.
	259
	260	TIME is I<not> negative. countit() will run the loop many times to
	261	calculate the speed of CODE before running it for TIME. The actual
	262	time run for will usually be greater than TIME due to system clock
	263	resolution, so it's best to look at the number of iterations divided
	264	by the times that you are concerned with, not just the iterations.
	265
	266	Returns: a Benchmark object.
	267
	268	=item disablecache ( )
	269
	270	Disable caching of timings for the null loop. This will force Benchmark
	271	to recalculate these timings for each new piece of code timed.
	272
	273	=item enablecache ( )
	274
	275	Enable caching of timings for the null loop. The time taken for COUNT
	276	rounds of the null loop will be calculated only once for each
	277	different COUNT used.
	278
	279	=item timesum ( T1, T2 )
	280
	281	Returns the sum of two Benchmark times as a Benchmark object suitable
	282	for passing to timestr().
	283
	284	=back
	285
	286	=head2 :hireswallclock
	287
	288	If the Time::HiRes module has been installed, you can specify the
	289	special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
	290	available, the tag will be silently ignored). This tag will cause the
	291	wallclock time to be measured in microseconds, instead of integer
	292	seconds. Note though that the speed computations are still conducted
	293	in CPU time, not wallclock time.
	294
	295	=head1 Benchmark Object
	296
	297	Many of the functions in this module return a Benchmark object,
	298	or in the case of C<timethese()>, a reference to a hash, the values of
	299	which are Benchmark objects. This is useful if you want to store or
	300	further process results from Benchmark functions.
	301
	302	Internally the Benchmark object holds timing values,
	303	described in L</"NOTES"> below.
	304	The following methods can be used to access them:
	305
	306	=over 4
	307
	308	=item cpu_p
	309
	310	Total CPU (User + System) of the main (parent) process.
	311
	312	=item cpu_c
	313
	314	Total CPU (User + System) of any children processes.
	315
	316	=item cpu_a
	317
	318	Total CPU of parent and any children processes.
	319
	320	=item real
	321
	322	Real elapsed time "wallclock seconds".
	323
	324	=item iters
	325
	326	Number of iterations run.
	327
	328	=back
	329
	330	The following illustrates use of the Benchmark object:
	331
	332	$result = timethis(100000, sub { ... });
	333	print "total CPU = ", $result->cpu_a, "\n";
	334
	335	=head1 NOTES
	336
	337	The data is stored as a list of values from the time and times
	338	functions:
	339
	340	($real, $user, $system, $children_user, $children_system, $iters)
	341
	342	in seconds for the whole loop (not divided by the number of rounds).
	343
	344	The timing is done using time(3) and times(3).
	345
	346	Code is executed in the caller's package.
	347
	348	The time of the null loop (a loop with the same
	349	number of rounds but empty loop body) is subtracted
	350	from the time of the real loop.
	351
	352	The null loop times can be cached, the key being the
	353	number of rounds. The caching can be controlled using
	354	calls like these:
	355
	356	clearcache($key);
	357	clearallcache();
	358
	359	disablecache();
	360	enablecache();
	361
	362	Caching is off by default, as it can (usually slightly) decrease
	363	accuracy and does not usually noticeably affect runtimes.
	364
	365	=head1 EXAMPLES
	366
	367	For example,
	368
	369	use Benchmark qw( cmpthese ) ;
	370	$x = 3;
	371	cmpthese( -5, {
	372	a => sub{$x*$x},
	373	b => sub{$x**2},
	374	} );
	375
	376	outputs something like this:
	377
	378	Benchmark: running a, b, each for at least 5 CPU seconds...
	379	Rate b a
	380	b 1559428/s -- -62%
	381	a 4152037/s 166% --
	382
	383
	384	while
	385
	386	use Benchmark qw( timethese cmpthese ) ;
	387	$x = 3;
	388	$r = timethese( -5, {
	389	a => sub{$x*$x},
	390	b => sub{$x**2},
	391	} );
	392	cmpthese $r;
	393
	394	outputs something like this:
	395
	396	Benchmark: running a, b, each for at least 5 CPU seconds...
	397	a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
	398	b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
	399	Rate b a
	400	b 1574945/s -- -59%
	401	a 3835056/s 144% --
	402
	403
	404	=head1 INHERITANCE
	405
	406	Benchmark inherits from no other class, except of course
	407	for Exporter.
	408
	409	=head1 CAVEATS
	410
	411	Comparing eval'd strings with code references will give you
	412	inaccurate results: a code reference will show a slightly slower
	413	execution time than the equivalent eval'd string.
	414
	415	The real time timing is done using time(2) and
	416	the granularity is therefore only one second.
	417
	418	Short tests may produce negative figures because perl
	419	can appear to take longer to execute the empty loop
	420	than a short test; try:
	421
	422	timethis(100,'1');
	423
	424	The system time of the null loop might be slightly
	425	more than the system time of the loop with the actual
	426	code and therefore the difference might end up being E<lt> 0.
	427
	428	=head1 SEE ALSO
	429
	430	L<Devel::NYTProf> - a Perl code profiler
	431
	432	=head1 AUTHORS
	433
	434	Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
	435
	436	=head1 MODIFICATION HISTORY
	437
	438	September 8th, 1994; by Tim Bunce.
	439
	440	March 28th, 1997; by Hugo van der Sanden: added support for code
	441	references and the already documented 'debug' method; revamped
	442	documentation.
	443
	444	April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
	445	functionality.
	446
	447	September, 1999; by Barrie Slaymaker: math fixes and accuracy and
	448	efficiency tweaks. Added cmpthese(). A result is now returned from
	449	timethese(). Exposed countit() (was runfor()).
	450
	451	December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
	452	and return an empty string. If cmpthese is calling timethese, make it pass the
	453	style in. (so that 'none' will suppress output). Make sub new dump its
	454	debugging output to STDERR, to be consistent with everything else.
	455	All bugs found while writing a regression test.
	456
	457	September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
	458
	459	February, 2004; by Chia-liang Kao: make cmpthese and timestr use time
	460	statistics for children instead of parent when the style is 'nop'.
	461
	462	November, 2007; by Christophe Grosjean: make cmpthese and timestr compute
	463	time consistently with style argument, default is 'all' not 'noc' any more.
	464
	465	=cut
	466
	467	# evaluate something in a clean lexical environment
	468	sub _doeval { no strict; eval shift }
	469
	470	#
	471	# put any lexicals at file scope AFTER here
	472	#
	473
	474	use Carp;
	475	use Exporter;
	476
	477	our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
	478
	479	@ISA=qw(Exporter);
	480	@EXPORT=qw(timeit timethis timethese timediff timestr);
	481	@EXPORT_OK=qw(timesum cmpthese countit
	482	clearcache clearallcache disablecache enablecache);
	483	%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
	484
	485	$VERSION = 1.17;
	486
	487	# --- ':hireswallclock' special handling
	488
	489	my $hirestime;
	490
	491	sub mytime () { time }
	492
	493	init();
	494
	495	sub BEGIN {
	496	if (eval 'require Time::HiRes') {
	497	import Time::HiRes qw(time);
	498	$hirestime = \&Time::HiRes::time;
	499	}
	500	}
	501
	502	sub import {
	503	my $class = shift;
	504	if (grep { $_ eq ":hireswallclock" } @_) {
	505	@_ = grep { $_ ne ":hireswallclock" } @_;
	506	local $^W=0;
	507	*mytime = $hirestime if defined $hirestime;
	508	}
	509	Benchmark->export_to_level(1, $class, @_);
	510	}
	511
	512	our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style,
	513	%_Usage, %Cache, $Do_Cache);
	514
	515	sub init {
	516	$Debug = 0;
	517	$Min_Count = 4;
	518	$Min_CPU = 0.4;
	519	$Default_Format = '5.2f';
	520	$Default_Style = 'auto';
	521	# The cache can cause a slight loss of sys time accuracy. If a
	522	# user does many tests (>10) with very large counts (>10000)
	523	# or works on a very slow machine the cache may be useful.
	524	disablecache();
	525	clearallcache();
	526	}
	527
	528	sub debug { $Debug = ($_[1] != 0); }
	529
	530	sub usage {
	531	my $calling_sub = (caller(1))[3];
	532	$calling_sub =~ s/^Benchmark:://;
	533	return $_Usage{$calling_sub} \|\| '';
	534	}
	535
	536	# The cache needs two branches: 's' for strings and 'c' for code. The
	537	# empty loop is different in these two cases.
	538
	539	$_Usage{clearcache} = <<'USAGE';
	540	usage: clearcache($count);
	541	USAGE
	542
	543	sub clearcache {
	544	die usage unless @_ == 1;
	545	delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"};
	546	}
	547
	548	$_Usage{clearallcache} = <<'USAGE';
	549	usage: clearallcache();
	550	USAGE
	551
	552	sub clearallcache {
	553	die usage if @_;
	554	%Cache = ();
	555	}
	556
	557	$_Usage{enablecache} = <<'USAGE';
	558	usage: enablecache();
	559	USAGE
	560
	561	sub enablecache {
	562	die usage if @_;
	563	$Do_Cache = 1;
	564	}
	565
	566	$_Usage{disablecache} = <<'USAGE';
	567	usage: disablecache();
	568	USAGE
	569
	570	sub disablecache {
	571	die usage if @_;
	572	$Do_Cache = 0;
	573	}
	574
	575
	576	# --- Functions to process the 'time' data type
	577
	578	sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
	579	print STDERR "new=@t\n" if $Debug;
	580	bless \@t; }
	581
	582	sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
	583	sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
	584	sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
	585	sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
	586	sub iters { $_[0]->[5] ; }
	587
	588
	589	$_Usage{timediff} = <<'USAGE';
	590	usage: $result_diff = timediff($result1, $result2);
	591	USAGE
	592
	593	sub timediff {
	594	my($a, $b) = @_;
	595
	596	die usage unless ref $a and ref $b;
	597
	598	my @r;
	599	for (my $i=0; $i < @$a; ++$i) {
	600	push(@r, $a->[$i] - $b->[$i]);
	601	}
	602	#die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]\|@$b[1,2])\n"
	603	# if ($r[1] + $r[2]) < 0;
	604	bless \@r;
	605	}
	606
	607	$_Usage{timesum} = <<'USAGE';
	608	usage: $sum = timesum($result1, $result2);
	609	USAGE
	610
	611	sub timesum {
	612	my($a, $b) = @_;
	613
	614	die usage unless ref $a and ref $b;
	615
	616	my @r;
	617	for (my $i=0; $i < @$a; ++$i) {
	618	push(@r, $a->[$i] + $b->[$i]);
	619	}
	620	bless \@r;
	621	}
	622
	623
	624	$_Usage{timestr} = <<'USAGE';
	625	usage: $formatted_result = timestr($result1);
	626	USAGE
	627
	628	sub timestr {
	629	my($tr, $style, $f) = @_;
	630
	631	die usage unless ref $tr;
	632
	633	my @t = @$tr;
	634	warn "bad time value (@t)" unless @t==6;
	635	my($r, $pu, $ps, $cu, $cs, $n) = @t;
	636	my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
	637	$f = $Default_Format unless defined $f;
	638	# format a time in the required style, other formats may be added here
	639	$style \|\|= $Default_Style;
	640	return '' if $style eq 'none';
	641	$style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
	642	my $s = "@t $style"; # default for unknown style
	643	my $w = $hirestime ? "%2g" : "%2d";
	644	$s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
	645	$r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
	646	$s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
	647	$r,$pu,$ps,$pt) if $style eq 'noc';
	648	$s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
	649	$r,$cu,$cs,$ct) if $style eq 'nop';
	650	my $elapsed = do {
	651	if ($style eq 'nop') {$cu+$cs}
	652	elsif ($style eq 'noc') {$pu+$ps}
	653	else {$cu+$cs+$pu+$ps}
	654	};
	655	$s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed;
	656	$s;
	657	}
	658
	659	sub timedebug {
	660	my($msg, $t) = @_;
	661	print STDERR "$msg",timestr($t),"\n" if $Debug;
	662	}
	663
	664	# --- Functions implementing low-level support for timing loops
	665
	666	$_Usage{runloop} = <<'USAGE';
	667	usage: runloop($number, [$string \| $coderef])
	668	USAGE
	669
	670	sub runloop {
	671	my($n, $c) = @_;
	672
	673	$n+=0; # force numeric now, so garbage won't creep into the eval
	674	croak "negative loopcount $n" if $n<0;
	675	confess usage unless defined $c;
	676	my($t0, $t1, $td); # before, after, difference
	677
	678	# find package of caller so we can execute code there
	679	my($curpack) = caller(0);
	680	my($i, $pack)= 0;
	681	while (($pack) = caller(++$i)) {
	682	last if $pack ne $curpack;
	683	}
	684
	685	my ($subcode, $subref);
	686	if (ref $c eq 'CODE') {
	687	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
	688	$subref = eval $subcode;
	689	}
	690	else {
	691	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
	692	$subref = _doeval($subcode);
	693	}
	694	croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
	695	print STDERR "runloop $n '$subcode'\n" if $Debug;
	696
	697	# Wait for the user timer to tick. This makes the error range more like
	698	# -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
	699	# may not seem important, but it significantly reduces the chances of
	700	# getting a too low initial $n in the initial, 'find the minimum' loop
	701	# in &countit. This, in turn, can reduce the number of calls to
	702	# &runloop a lot, and thus reduce additive errors.
	703	my $tbase = Benchmark->new(0)->[1];
	704	while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
	705	$subref->();
	706	$t1 = Benchmark->new($n);
	707	$td = &timediff($t1, $t0);
	708	timedebug("runloop:",$td);
	709	$td;
	710	}
	711
	712	$_Usage{timeit} = <<'USAGE';
	713	usage: $result = timeit($count, 'code' ); or
	714	$result = timeit($count, sub { code } );
	715	USAGE
	716
	717	sub timeit {
	718	my($n, $code) = @_;
	719	my($wn, $wc, $wd);
	720
	721	die usage unless defined $code and
	722	(!ref $code or ref $code eq 'CODE');
	723
	724	printf STDERR "timeit $n $code\n" if $Debug;
	725	my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
	726	if ($Do_Cache && exists $Cache{$cache_key} ) {
	727	$wn = $Cache{$cache_key};
	728	} else {
	729	$wn = &runloop($n, ref( $code ) ? sub { } : '' );
	730	# Can't let our baseline have any iterations, or they get subtracted
	731	# out of the result.
	732	$wn->[5] = 0;
	733	$Cache{$cache_key} = $wn;
	734	}
	735
	736	$wc = &runloop($n, $code);
	737
	738	$wd = timediff($wc, $wn);
	739	timedebug("timeit: ",$wc);
	740	timedebug(" - ",$wn);
	741	timedebug(" = ",$wd);
	742
	743	$wd;
	744	}
	745
	746
	747	my $default_for = 3;
	748	my $min_for = 0.1;
	749
	750
	751	$_Usage{countit} = <<'USAGE';
	752	usage: $result = countit($time, 'code' ); or
	753	$result = countit($time, sub { code } );
	754	USAGE
	755
	756	sub countit {
	757	my ( $tmax, $code ) = @_;
	758
	759	die usage unless @_;
	760
	761	if ( not defined $tmax or $tmax == 0 ) {
	762	$tmax = $default_for;
	763	} elsif ( $tmax < 0 ) {
	764	$tmax = -$tmax;
	765	}
	766
	767	die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
	768	if $tmax < $min_for;
	769
	770	my ($n, $tc);
	771
	772	# First find the minimum $n that gives a significant timing.
	773	my $zeros=0;
	774	for ($n = 1; ; $n *= 2 ) {
	775	my $td = timeit($n, $code);
	776	$tc = $td->[1] + $td->[2];
	777	if ( $tc <= 0 and $n > 1024 ) {
	778	++$zeros > 16
	779	and die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
	780	} else {
	781	$zeros = 0;
	782	}
	783	last if $tc > 0.1;
	784	}
	785
	786	my $nmin = $n;
	787
	788	# Get $n high enough that we can guess the final $n with some accuracy.
	789	my $tpra = 0.1 * $tmax; # Target/time practice.
	790	while ( $tc < $tpra ) {
	791	# The 5% fudge is to keep us from iterating again all
	792	# that often (this speeds overall responsiveness when $tmax is big
	793	# and we guess a little low). This does not noticeably affect
	794	# accuracy since we're not counting these times.
	795	$n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
	796	my $td = timeit($n, $code);
	797	my $new_tc = $td->[1] + $td->[2];
	798	# Make sure we are making progress.
	799	$tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
	800	}
	801
	802	# Now, do the 'for real' timing(s), repeating until we exceed
	803	# the max.
	804	my $ntot = 0;
	805	my $rtot = 0;
	806	my $utot = 0.0;
	807	my $stot = 0.0;
	808	my $cutot = 0.0;
	809	my $cstot = 0.0;
	810	my $ttot = 0.0;
	811
	812	# The 5% fudge is because $n is often a few % low even for routines
	813	# with stable times and avoiding extra timeit()s is nice for
	814	# accuracy's sake.
	815	$n = int( $n * ( 1.05 * $tmax / $tc ) );
	816	$zeros=0;
	817	while () {
	818	my $td = timeit($n, $code);
	819	$ntot += $n;
	820	$rtot += $td->[0];
	821	$utot += $td->[1];
	822	$stot += $td->[2];
	823	$cutot += $td->[3];
	824	$cstot += $td->[4];
	825	$ttot = $utot + $stot;
	826	last if $ttot >= $tmax;
	827	if ( $ttot <= 0 ) {
	828	++$zeros > 16
	829	and die "Timing is consistently zero, cannot benchmark. N=$n\n";
	830	} else {
	831	$zeros = 0;
	832	}
	833	$ttot = 0.01 if $ttot < 0.01;
	834	my $r = $tmax / $ttot - 1; # Linear approximation.
	835	$n = int( $r * $ntot );
	836	$n = $nmin if $n < $nmin;
	837	}
	838
	839	return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
	840	}
	841
	842	# --- Functions implementing high-level time-then-print utilities
	843
	844	sub n_to_for {
	845	my $n = shift;
	846	return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
	847	}
	848
	849	$_Usage{timethis} = <<'USAGE';
	850	usage: $result = timethis($time, 'code' ); or
	851	$result = timethis($time, sub { code } );
	852	USAGE
	853
	854	sub timethis{
	855	my($n, $code, $title, $style) = @_;
	856	my($t, $forn);
	857
	858	die usage unless defined $code and
	859	(!ref $code or ref $code eq 'CODE');
	860
	861	if ( $n > 0 ) {
	862	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	863	$t = timeit($n, $code);
	864	$title = "timethis $n" unless defined $title;
	865	} else {
	866	my $fort = n_to_for( $n );
	867	$t = countit( $fort, $code );
	868	$title = "timethis for $fort" unless defined $title;
	869	$forn = $t->[-1];
	870	}
	871	local $\| = 1;
	872	$style = "" unless defined $style;
	873	printf("%10s: ", $title) unless $style eq 'none';
	874	print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none';
	875
	876	$n = $forn if defined $forn;
	877
	878	# A conservative warning to spot very silly tests.
	879	# Don't assume that your benchmark is ok simply because
	880	# you don't get this warning!
	881	print " (warning: too few iterations for a reliable count)\n"
	882	if $n < $Min_Count
	883	\|\| ($t->real < 1 && $n < 1000)
	884	\|\| $t->cpu_a < $Min_CPU;
	885	$t;
	886	}
	887
	888
	889	$_Usage{timethese} = <<'USAGE';
	890	usage: timethese($count, { Name1 => 'code1', ... }); or
	891	timethese($count, { Name1 => sub { code1 }, ... });
	892	USAGE
	893
	894	sub timethese{
	895	my($n, $alt, $style) = @_;
	896	die usage unless ref $alt eq 'HASH';
	897
	898	my @names = sort keys %$alt;
	899	$style = "" unless defined $style;
	900	print "Benchmark: " unless $style eq 'none';
	901	if ( $n > 0 ) {
	902	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	903	print "timing $n iterations of" unless $style eq 'none';
	904	} else {
	905	print "running" unless $style eq 'none';
	906	}
	907	print " ", join(', ',@names) unless $style eq 'none';
	908	unless ( $n > 0 ) {
	909	my $for = n_to_for( $n );
	910	print ", each" if $n > 1 && $style ne 'none';
	911	print " for at least $for CPU seconds" unless $style eq 'none';
	912	}
	913	print "...\n" unless $style eq 'none';
	914
	915	# we could save the results in an array and produce a summary here
	916	# sum, min, max, avg etc etc
	917	my %results;
	918	foreach my $name (@names) {
	919	$results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
	920	}
	921
	922	return \%results;
	923	}
	924
	925
	926	$_Usage{cmpthese} = <<'USAGE';
	927	usage: cmpthese($count, { Name1 => 'code1', ... }); or
	928	cmpthese($count, { Name1 => sub { code1 }, ... }); or
	929	cmpthese($result, $style);
	930	USAGE
	931
	932	sub cmpthese{
	933	my ($results, $style);
	934
	935	# $count can be a blessed object.
	936	if ( ref $_[0] eq 'HASH' ) {
	937	($results, $style) = @_;
	938	}
	939	else {
	940	my($count, $code) = @_[0,1];
	941	$style = $_[2] if defined $_[2];
	942
	943	die usage unless ref $code eq 'HASH';
	944
	945	$results = timethese($count, $code, ($style \|\| "none"));
	946	}
	947
	948	$style = "" unless defined $style;
	949
	950	# Flatten in to an array of arrays with the name as the first field
	951	my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
	952
	953	for (@vals) {
	954	# The epsilon fudge here is to prevent div by 0. Since clock
	955	# resolutions are much larger, it's below the noise floor.
	956	my $elapsed = do {
	957	if ($style eq 'nop') {$_->[4]+$_->[5]}
	958	elsif ($style eq 'noc') {$_->[2]+$_->[3]}
	959	else {$_->[2]+$_->[3]+$_->[4]+$_->[5]}
	960	};
	961	my $rate = $_->[6]/(($elapsed)+0.000000000000001);
	962	$_->[7] = $rate;
	963	}
	964
	965	# Sort by rate
	966	@vals = sort { $a->[7] <=> $b->[7] } @vals;
	967
	968	# If more than half of the rates are greater than one...
	969	my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0;
	970
	971	my @rows;
	972	my @col_widths;
	973
	974	my @top_row = (
	975	'',
	976	$display_as_rate ? 'Rate' : 's/iter',
	977	map { $_->[0] } @vals
	978	);
	979
	980	push @rows, \@top_row;
	981	@col_widths = map { length( $_ ) } @top_row;
	982
	983	# Build the data rows
	984	# We leave the last column in even though it never has any data. Perhaps
	985	# it should go away. Also, perhaps a style for a single column of
	986	# percentages might be nice.
	987	for my $row_val ( @vals ) {
	988	my @row;
	989
	990	# Column 0 = test name
	991	push @row, $row_val->[0];
	992	$col_widths[0] = length( $row_val->[0] )
	993	if length( $row_val->[0] ) > $col_widths[0];
	994
	995	# Column 1 = performance
	996	my $row_rate = $row_val->[7];
	997
	998	# We assume that we'll never get a 0 rate.
	999	my $rate = $display_as_rate ? $row_rate : 1 / $row_rate;
	1000
	1001	# Only give a few decimal places before switching to sci. notation,
	1002	# since the results aren't usually that accurate anyway.
	1003	my $format =
	1004	$rate >= 100 ?
	1005	"%0.0f" :
	1006	$rate >= 10 ?
	1007	"%0.1f" :
	1008	$rate >= 1 ?
	1009	"%0.2f" :
	1010	$rate >= 0.1 ?
	1011	"%0.3f" :
	1012	"%0.2e";
	1013
	1014	$format .= "/s"
	1015	if $display_as_rate;
	1016
	1017	my $formatted_rate = sprintf( $format, $rate );
	1018	push @row, $formatted_rate;
	1019	$col_widths[1] = length( $formatted_rate )
	1020	if length( $formatted_rate ) > $col_widths[1];
	1021
	1022	# Columns 2..N = performance ratios
	1023	my $skip_rest = 0;
	1024	for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
	1025	my $col_val = $vals[$col_num];
	1026	my $out;
	1027	if ( $skip_rest ) {
	1028	$out = '';
	1029	}
	1030	elsif ( $col_val->[0] eq $row_val->[0] ) {
	1031	$out = "--";
	1032	# $skip_rest = 1;
	1033	}
	1034	else {
	1035	my $col_rate = $col_val->[7];
	1036	$out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
	1037	}
	1038	push @row, $out;
	1039	$col_widths[$col_num+2] = length( $out )
	1040	if length( $out ) > $col_widths[$col_num+2];
	1041
	1042	# A little weirdness to set the first column width properly
	1043	$col_widths[$col_num+2] = length( $col_val->[0] )
	1044	if length( $col_val->[0] ) > $col_widths[$col_num+2];
	1045	}
	1046	push @rows, \@row;
	1047	}
	1048
	1049	return \@rows if $style eq "none";
	1050
	1051	# Equalize column widths in the chart as much as possible without
	1052	# exceeding 80 characters. This does not use or affect cols 0 or 1.
	1053	my @sorted_width_refs =
	1054	sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
	1055	my $max_width = ${$sorted_width_refs[-1]};
	1056
	1057	my $total = @col_widths - 1 ;
	1058	for ( @col_widths ) { $total += $_ }
	1059
	1060	STRETCHER:
	1061	while ( $total < 80 ) {
	1062	my $min_width = ${$sorted_width_refs[0]};
	1063	last
	1064	if $min_width == $max_width;
	1065	for ( @sorted_width_refs ) {
	1066	last
	1067	if $$_ > $min_width;
	1068	++$$_;
	1069	++$total;
	1070	last STRETCHER
	1071	if $total >= 80;
	1072	}
	1073	}
	1074
	1075	# Dump the output
	1076	my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
	1077	substr( $format, 1, 0 ) = '-';
	1078	for ( @rows ) {
	1079	printf $format, @$_;
	1080	}
	1081
	1082	return \@rows ;
	1083	}
	1084
	1085
	1086	1;