perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package Benchmark;
	2
	3	use strict;
	4
	5
	6	=head1 NAME
	7
	8	Benchmark - benchmark running times of Perl code
	9
	10	=head1 SYNOPSIS
	11
	12	use Benchmark qw(:all) ;
	13
	14	timethis ($count, "code");
	15
	16	# Use Perl code in strings...
	17	timethese($count, {
	18	'Name1' => '...code1...',
	19	'Name2' => '...code2...',
	20	});
	21
	22	# ... or use subroutine references.
	23	timethese($count, {
	24	'Name1' => sub { ...code1... },
	25	'Name2' => sub { ...code2... },
	26	});
	27
	28	# cmpthese can be used both ways as well
	29	cmpthese($count, {
	30	'Name1' => '...code1...',
	31	'Name2' => '...code2...',
	32	});
	33
	34	cmpthese($count, {
	35	'Name1' => sub { ...code1... },
	36	'Name2' => sub { ...code2... },
	37	});
	38
	39	# ...or in two stages
	40	$results = timethese($count,
	41	{
	42	'Name1' => sub { ...code1... },
	43	'Name2' => sub { ...code2... },
	44	},
	45	'none'
	46	);
	47	cmpthese( $results ) ;
	48
	49	$t = timeit($count, '...other code...')
	50	print "$count loops of other code took:",timestr($t),"\n";
	51
	52	$t = countit($time, '...other code...')
	53	$count = $t->iters ;
	54	print "$count loops of other code took:",timestr($t),"\n";
	55
	56	# enable hires wallclock timing if possible
	57	use Benchmark ':hireswallclock';
	58
	59	=head1 DESCRIPTION
	60
	61	The Benchmark module encapsulates a number of routines to help you
	62	figure out how long it takes to execute some code.
	63
	64	timethis - run a chunk of code several times
	65
	66	timethese - run several chunks of code several times
	67
	68	cmpthese - print results of timethese as a comparison chart
	69
	70	timeit - run a chunk of code and see how long it goes
	71
	72	countit - see how many times a chunk of code runs in a given time
	73
	74
	75	=head2 Methods
	76
	77	=over 10
	78
	79	=item new
	80
	81	Returns the current time. Example:
	82
	83	use Benchmark;
	84	$t0 = Benchmark->new;
	85	# ... your code here ...
	86	$t1 = Benchmark->new;
	87	$td = timediff($t1, $t0);
	88	print "the code took:",timestr($td),"\n";
	89
	90	=item debug
	91
	92	Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
	93
	94	Benchmark->debug(1);
	95	$t = timeit(10, ' 5 ** $Global ');
	96	Benchmark->debug(0);
	97
	98	=item iters
	99
	100	Returns the number of iterations.
	101
	102	=back
	103
	104	=head2 Standard Exports
	105
	106	The following routines will be exported into your namespace
	107	if you use the Benchmark module:
	108
	109	=over 10
	110
	111	=item timeit(COUNT, CODE)
	112
	113	Arguments: COUNT is the number of times to run the loop, and CODE is
	114	the code to run. CODE may be either a code reference or a string to
	115	be eval'd; either way it will be run in the caller's package.
	116
	117	Returns: a Benchmark object.
	118
	119	=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
	120
	121	Time COUNT iterations of CODE. CODE may be a string to eval or a
	122	code reference; either way the CODE will run in the caller's package.
	123	Results will be printed to STDOUT as TITLE followed by the times.
	124	TITLE defaults to "timethis COUNT" if none is provided. STYLE
	125	determines the format of the output, as described for timestr() below.
	126
	127	The COUNT can be zero or negative: this means the I<minimum number of
	128	CPU seconds> to run. A zero signifies the default of 3 seconds. For
	129	example to run at least for 10 seconds:
	130
	131	timethis(-10, $code)
	132
	133	or to run two pieces of code tests for at least 3 seconds:
	134
	135	timethese(0, { test1 => '...', test2 => '...'})
	136
	137	CPU seconds is, in UNIX terms, the user time plus the system time of
	138	the process itself, as opposed to the real (wallclock) time and the
	139	time spent by the child processes. Less than 0.1 seconds is not
	140	accepted (-0.01 as the count, for example, will cause a fatal runtime
	141	exception).
	142
	143	Note that the CPU seconds is the B<minimum> time: CPU scheduling and
	144	other operating system factors may complicate the attempt so that a
	145	little bit more time is spent. The benchmark output will, however,
	146	also tell the number of C<$code> runs/second, which should be a more
	147	interesting number than the actually spent seconds.
	148
	149	Returns a Benchmark object.
	150
	151	=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
	152
	153	The CODEHASHREF is a reference to a hash containing names as keys
	154	and either a string to eval or a code reference for each value.
	155	For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
	156	call
	157
	158	timethis(COUNT, VALUE, KEY, STYLE)
	159
	160	The routines are called in string comparison order of KEY.
	161
	162	The COUNT can be zero or negative, see timethis().
	163
	164	Returns a hash reference of Benchmark objects, keyed by name.
	165
	166	=item timediff ( T1, T2 )
	167
	168	Returns the difference between two Benchmark times as a Benchmark
	169	object suitable for passing to timestr().
	170
	171	=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
	172
	173	Returns a string that formats the times in the TIMEDIFF object in
	174	the requested STYLE. TIMEDIFF is expected to be a Benchmark object
	175	similar to that returned by timediff().
	176
	177	STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
	178	each of the 5 times available ('wallclock' time, user time, system time,
	179	user time of children, and system time of children). 'noc' shows all
	180	except the two children times. 'nop' shows only wallclock and the
	181	two children times. 'auto' (the default) will act as 'all' unless
	182	the children times are both zero, in which case it acts as 'noc'.
	183	'none' prevents output.
	184
	185	FORMAT is the L<printf(3)>-style format specifier (without the
	186	leading '%') to use to print the times. It defaults to '5.2f'.
	187
	188	=back
	189
	190	=head2 Optional Exports
	191
	192	The following routines will be exported into your namespace
	193	if you specifically ask that they be imported:
	194
	195	=over 10
	196
	197	=item clearcache ( COUNT )
	198
	199	Clear the cached time for COUNT rounds of the null loop.
	200
	201	=item clearallcache ( )
	202
	203	Clear all cached times.
	204
	205	=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
	206
	207	=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
	208
	209	Optionally calls timethese(), then outputs comparison chart. This:
	210
	211	cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	212
	213	outputs a chart like:
	214
	215	Rate b a
	216	b 2831802/s -- -61%
	217	a 7208959/s 155% --
	218
	219	This chart is sorted from slowest to fastest, and shows the percent speed
	220	difference between each pair of tests.
	221
	222	C<cmpthese> can also be passed the data structure that timethese() returns:
	223
	224	$results = timethese( -1,
	225	{ a => "++\$i", b => "\$i *= 2" } ) ;
	226	cmpthese( $results );
	227
	228	in case you want to see both sets of results.
	229	If the first argument is an unblessed hash reference,
	230	that is RESULTSHASHREF; otherwise that is COUNT.
	231
	232	Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
	233	above chart, including labels. This:
	234
	235	my $rows = cmpthese( -1,
	236	{ a => '++$i', b => '$i *= 2' }, "none" );
	237
	238	returns a data structure like:
	239
	240	[
	241	[ '', 'Rate', 'b', 'a' ],
	242	[ 'b', '2885232/s', '--', '-59%' ],
	243	[ 'a', '7099126/s', '146%', '--' ],
	244	]
	245
	246	B<NOTE>: This result value differs from previous versions, which returned
	247	the C<timethese()> result structure. If you want that, just use the two
	248	statement C<timethese>...C<cmpthese> idiom shown above.
	249
	250	Incidentally, note the variance in the result values between the two examples;
	251	this is typical of benchmarking. If this were a real benchmark, you would
	252	probably want to run a lot more iterations.
	253
	254	=item countit(TIME, CODE)
	255
	256	Arguments: TIME is the minimum length of time to run CODE for, and CODE is
	257	the code to run. CODE may be either a code reference or a string to
	258	be eval'd; either way it will be run in the caller's package.
	259
	260	TIME is I<not> negative. countit() will run the loop many times to
	261	calculate the speed of CODE before running it for TIME. The actual
	262	time run for will usually be greater than TIME due to system clock
	263	resolution, so it's best to look at the number of iterations divided
	264	by the times that you are concerned with, not just the iterations.
	265
	266	Returns: a Benchmark object.
	267
	268	=item disablecache ( )
	269
	270	Disable caching of timings for the null loop. This will force Benchmark
	271	to recalculate these timings for each new piece of code timed.
	272
	273	=item enablecache ( )
	274
	275	Enable caching of timings for the null loop. The time taken for COUNT
	276	rounds of the null loop will be calculated only once for each
	277	different COUNT used.
	278
	279	=item timesum ( T1, T2 )
	280
	281	Returns the sum of two Benchmark times as a Benchmark object suitable
	282	for passing to timestr().
	283
	284	=back
	285
	286	=head2 :hireswallclock
	287
	288	If the Time::HiRes module has been installed, you can specify the
	289	special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
	290	available, the tag will be silently ignored). This tag will cause the
	291	wallclock time to be measured in microseconds, instead of integer
	292	seconds. Note though that the speed computations are still conducted
	293	in CPU time, not wallclock time.
	294
	295	=head1 Benchmark Object
	296
	297	Many of the functions in this module return a Benchmark object,
	298	or in the case of C<timethese()>, a reference to a hash, the values of
	299	which are Benchmark objects. This is useful if you want to store or
	300	further process results from Benchmark functions.
	301
	302	Internally the Benchmark object holds timing values,
	303	described in L</"NOTES"> below.
	304	The following methods can be used to access them:
	305
	306	=over 4
	307
	308	=item cpu_p
	309
	310	Total CPU (User + System) of the main (parent) process.
	311
	312	=item cpu_c
	313
	314	Total CPU (User + System) of any children processes.
	315
	316	=item cpu_a
	317
	318	Total CPU of parent and any children processes.
	319
	320	=item real
	321
	322	Real elapsed time "wallclock seconds".
	323
	324	=item iters
	325
	326	Number of iterations run.
	327
	328	=back
	329
	330	The following illustrates use of the Benchmark object:
	331
	332	$result = timethis(100000, sub { ... });
	333	print "total CPU = ", $result->cpu_a, "\n";
	334
	335	=head1 NOTES
	336
	337	The data is stored as a list of values from the time and times
	338	functions:
	339
	340	($real, $user, $system, $children_user, $children_system, $iters)
	341
	342	in seconds for the whole loop (not divided by the number of rounds).
	343
	344	The timing is done using time(3) and times(3).
	345
	346	Code is executed in the caller's package.
	347
	348	The time of the null loop (a loop with the same
	349	number of rounds but empty loop body) is subtracted
	350	from the time of the real loop.
	351
	352	The null loop times can be cached, the key being the
	353	number of rounds. The caching can be controlled using
	354	calls like these:
	355
	356	clearcache($key);
	357	clearallcache();
	358
	359	disablecache();
	360	enablecache();
	361
	362	Caching is off by default, as it can (usually slightly) decrease
	363	accuracy and does not usually noticeably affect runtimes.
	364
	365	=head1 EXAMPLES
	366
	367	For example,
	368
	369	use Benchmark qw( cmpthese ) ;
	370	$x = 3;
	371	cmpthese( -5, {
	372	a => sub{$x*$x},
	373	b => sub{$x**2},
	374	} );
	375
	376	outputs something like this:
	377
	378	Benchmark: running a, b, each for at least 5 CPU seconds...
	379	Rate b a
	380	b 1559428/s -- -62%
	381	a 4152037/s 166% --
	382
	383
	384	while
	385
	386	use Benchmark qw( timethese cmpthese ) ;
	387	$x = 3;
	388	$r = timethese( -5, {
	389	a => sub{$x*$x},
	390	b => sub{$x**2},
	391	} );
	392	cmpthese $r;
	393
	394	outputs something like this:
	395
	396	Benchmark: running a, b, each for at least 5 CPU seconds...
	397	a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
	398	b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
	399	Rate b a
	400	b 1574945/s -- -59%
	401	a 3835056/s 144% --
	402
	403
	404	=head1 INHERITANCE
	405
	406	Benchmark inherits from no other class, except of course
	407	for Exporter.
	408
	409	=head1 CAVEATS
	410
	411	Comparing eval'd strings with code references will give you
	412	inaccurate results: a code reference will show a slightly slower
	413	execution time than the equivalent eval'd string.
	414
	415	The real time timing is done using time(2) and
	416	the granularity is therefore only one second.
	417
	418	Short tests may produce negative figures because perl
	419	can appear to take longer to execute the empty loop
	420	than a short test; try:
	421
	422	timethis(100,'1');
	423
	424	The system time of the null loop might be slightly
	425	more than the system time of the loop with the actual
	426	code and therefore the difference might end up being E<lt> 0.
	427
	428	=head1 SEE ALSO
	429
	430	L<Devel::NYTProf> - a Perl code profiler
	431
	432	=head1 AUTHORS
	433
	434	Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
	435
	436	=head1 MODIFICATION HISTORY
	437
	438	September 8th, 1994; by Tim Bunce.
	439
	440	March 28th, 1997; by Hugo van der Sanden: added support for code
	441	references and the already documented 'debug' method; revamped
	442	documentation.
	443
	444	April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
	445	functionality.
	446
	447	September, 1999; by Barrie Slaymaker: math fixes and accuracy and
	448	efficiency tweaks. Added cmpthese(). A result is now returned from
	449	timethese(). Exposed countit() (was runfor()).
	450
	451	December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
	452	and return an empty string. If cmpthese is calling timethese, make it pass the
	453	style in. (so that 'none' will suppress output). Make sub new dump its
	454	debugging output to STDERR, to be consistent with everything else.
	455	All bugs found while writing a regression test.
	456
	457	September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
	458
	459	February, 2004; by Chia-liang Kao: make cmpthese and timestr use time
	460	statistics for children instead of parent when the style is 'nop'.
	461
	462	November, 2007; by Christophe Grosjean: make cmpthese and timestr compute
	463	time consistently with style argument, default is 'all' not 'noc' any more.
	464
	465	=cut
	466
	467	# evaluate something in a clean lexical environment
	468	sub _doeval { no strict; eval shift }
	469
	470	#
	471	# put any lexicals at file scope AFTER here
	472	#
	473
	474	use Carp;
	475	use Exporter;
	476
	477	our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
	478
	479	@ISA=qw(Exporter);
	480	@EXPORT=qw(timeit timethis timethese timediff timestr);
	481	@EXPORT_OK=qw(timesum cmpthese countit
	482	clearcache clearallcache disablecache enablecache);
	483	%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
	484
	485	$VERSION = 1.19;
	486
	487	# --- ':hireswallclock' special handling
	488
	489	my $hirestime;
	490
	491	sub mytime () { time }
	492
	493	init();
	494
	495	sub BEGIN {
	496	if (eval 'require Time::HiRes') {
	497	import Time::HiRes qw(time);
	498	$hirestime = \&Time::HiRes::time;
	499	}
	500	}
	501
	502	sub import {
	503	my $class = shift;
	504	if (grep { $_ eq ":hireswallclock" } @_) {
	505	@_ = grep { $_ ne ":hireswallclock" } @_;
	506	local $^W=0;
	507	*mytime = $hirestime if defined $hirestime;
	508	}
	509	Benchmark->export_to_level(1, $class, @_);
	510	}
	511
	512	our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style,
	513	%_Usage, %Cache, $Do_Cache);
	514
	515	sub init {
	516	$Debug = 0;
	517	$Min_Count = 4;
	518	$Min_CPU = 0.4;
	519	$Default_Format = '5.2f';
	520	$Default_Style = 'auto';
	521	# The cache can cause a slight loss of sys time accuracy. If a
	522	# user does many tests (>10) with very large counts (>10000)
	523	# or works on a very slow machine the cache may be useful.
	524	disablecache();
	525	clearallcache();
	526	}
	527
	528	sub debug { $Debug = ($_[1] != 0); }
	529
	530	sub usage {
	531	my $calling_sub = (caller(1))[3];
	532	$calling_sub =~ s/^Benchmark:://;
	533	return $_Usage{$calling_sub} \|\| '';
	534	}
	535
	536	# The cache needs two branches: 's' for strings and 'c' for code. The
	537	# empty loop is different in these two cases.
	538
	539	$_Usage{clearcache} = <<'USAGE';
	540	usage: clearcache($count);
	541	USAGE
	542
	543	sub clearcache {
	544	die usage unless @_ == 1;
	545	delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"};
	546	}
	547
	548	$_Usage{clearallcache} = <<'USAGE';
	549	usage: clearallcache();
	550	USAGE
	551
	552	sub clearallcache {
	553	die usage if @_;
	554	%Cache = ();
	555	}
	556
	557	$_Usage{enablecache} = <<'USAGE';
	558	usage: enablecache();
	559	USAGE
	560
	561	sub enablecache {
	562	die usage if @_;
	563	$Do_Cache = 1;
	564	}
	565
	566	$_Usage{disablecache} = <<'USAGE';
	567	usage: disablecache();
	568	USAGE
	569
	570	sub disablecache {
	571	die usage if @_;
	572	$Do_Cache = 0;
	573	}
	574
	575
	576	# --- Functions to process the 'time' data type
	577
	578	sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
	579	print STDERR "new=@t\n" if $Debug;
	580	bless \@t; }
	581
	582	sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
	583	sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
	584	sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
	585	sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
	586	sub iters { $_[0]->[5] ; }
	587
	588
	589	$_Usage{timediff} = <<'USAGE';
	590	usage: $result_diff = timediff($result1, $result2);
	591	USAGE
	592
	593	sub timediff {
	594	my($a, $b) = @_;
	595
	596	die usage unless ref $a and ref $b;
	597
	598	my @r;
	599	for (my $i=0; $i < @$a; ++$i) {
	600	push(@r, $a->[$i] - $b->[$i]);
	601	}
	602	#die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]\|@$b[1,2])\n"
	603	# if ($r[1] + $r[2]) < 0;
	604	bless \@r;
	605	}
	606
	607	$_Usage{timesum} = <<'USAGE';
	608	usage: $sum = timesum($result1, $result2);
	609	USAGE
	610
	611	sub timesum {
	612	my($a, $b) = @_;
	613
	614	die usage unless ref $a and ref $b;
	615
	616	my @r;
	617	for (my $i=0; $i < @$a; ++$i) {
	618	push(@r, $a->[$i] + $b->[$i]);
	619	}
	620	bless \@r;
	621	}
	622
	623
	624	$_Usage{timestr} = <<'USAGE';
	625	usage: $formatted_result = timestr($result1);
	626	USAGE
	627
	628	sub timestr {
	629	my($tr, $style, $f) = @_;
	630
	631	die usage unless ref $tr;
	632
	633	my @t = @$tr;
	634	warn "bad time value (@t)" unless @t==6;
	635	my($r, $pu, $ps, $cu, $cs, $n) = @t;
	636	my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
	637	$f = $Default_Format unless defined $f;
	638	# format a time in the required style, other formats may be added here
	639	$style \|\|= $Default_Style;
	640	return '' if $style eq 'none';
	641	$style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
	642	my $s = "@t $style"; # default for unknown style
	643	my $w = $hirestime ? "%2g" : "%2d";
	644	$s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
	645	$r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
	646	$s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
	647	$r,$pu,$ps,$pt) if $style eq 'noc';
	648	$s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
	649	$r,$cu,$cs,$ct) if $style eq 'nop';
	650	my $elapsed = do {
	651	if ($style eq 'nop') {$cu+$cs}
	652	elsif ($style eq 'noc') {$pu+$ps}
	653	else {$cu+$cs+$pu+$ps}
	654	};
	655	$s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed;
	656	$s;
	657	}
	658
	659	sub timedebug {
	660	my($msg, $t) = @_;
	661	print STDERR "$msg",timestr($t),"\n" if $Debug;
	662	}
	663
	664	# --- Functions implementing low-level support for timing loops
	665
	666	$_Usage{runloop} = <<'USAGE';
	667	usage: runloop($number, [$string \| $coderef])
	668	USAGE
	669
	670	sub runloop {
	671	my($n, $c) = @_;
	672
	673	$n+=0; # force numeric now, so garbage won't creep into the eval
	674	croak "negative loopcount $n" if $n<0;
	675	confess usage unless defined $c;
	676	my($t0, $t1, $td); # before, after, difference
	677
	678	# find package of caller so we can execute code there
	679	my($curpack) = caller(0);
	680	my($i, $pack)= 0;
	681	while (($pack) = caller(++$i)) {
	682	last if $pack ne $curpack;
	683	}
	684
	685	my ($subcode, $subref);
	686	if (ref $c eq 'CODE') {
	687	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
	688	$subref = eval $subcode;
	689	}
	690	else {
	691	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
	692	$subref = _doeval($subcode);
	693	}
	694	croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
	695	print STDERR "runloop $n '$subcode'\n" if $Debug;
	696
	697	# Wait for the user timer to tick. This makes the error range more like
	698	# -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
	699	# may not seem important, but it significantly reduces the chances of
	700	# getting a too low initial $n in the initial, 'find the minimum' loop
	701	# in &countit. This, in turn, can reduce the number of calls to
	702	# &runloop a lot, and thus reduce additive errors.
	703	#
	704	# Note that its possible for the act of reading the system clock to
	705	# burn lots of system CPU while we burn very little user clock in the
	706	# busy loop, which can cause the loop to run for a very long wall time.
	707	# So gradually ramp up the duration of the loop. See RT #122003
	708	#
	709	my $tbase = Benchmark->new(0)->[1];
	710	my $limit = 1;
	711	while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {
	712	for (my $i=0; $i < $limit; $i++) { my $x = $i / 1.5 } # burn user CPU
	713	$limit *= 1.1;
	714	}
	715	$subref->();
	716	$t1 = Benchmark->new($n);
	717	$td = &timediff($t1, $t0);
	718	timedebug("runloop:",$td);
	719	$td;
	720	}
	721
	722	$_Usage{timeit} = <<'USAGE';
	723	usage: $result = timeit($count, 'code' ); or
	724	$result = timeit($count, sub { code } );
	725	USAGE
	726
	727	sub timeit {
	728	my($n, $code) = @_;
	729	my($wn, $wc, $wd);
	730
	731	die usage unless defined $code and
	732	(!ref $code or ref $code eq 'CODE');
	733
	734	printf STDERR "timeit $n $code\n" if $Debug;
	735	my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
	736	if ($Do_Cache && exists $Cache{$cache_key} ) {
	737	$wn = $Cache{$cache_key};
	738	} else {
	739	$wn = &runloop($n, ref( $code ) ? sub { } : '' );
	740	# Can't let our baseline have any iterations, or they get subtracted
	741	# out of the result.
	742	$wn->[5] = 0;
	743	$Cache{$cache_key} = $wn;
	744	}
	745
	746	$wc = &runloop($n, $code);
	747
	748	$wd = timediff($wc, $wn);
	749	timedebug("timeit: ",$wc);
	750	timedebug(" - ",$wn);
	751	timedebug(" = ",$wd);
	752
	753	$wd;
	754	}
	755
	756
	757	my $default_for = 3;
	758	my $min_for = 0.1;
	759
	760
	761	$_Usage{countit} = <<'USAGE';
	762	usage: $result = countit($time, 'code' ); or
	763	$result = countit($time, sub { code } );
	764	USAGE
	765
	766	sub countit {
	767	my ( $tmax, $code ) = @_;
	768
	769	die usage unless @_;
	770
	771	if ( not defined $tmax or $tmax == 0 ) {
	772	$tmax = $default_for;
	773	} elsif ( $tmax < 0 ) {
	774	$tmax = -$tmax;
	775	}
	776
	777	die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
	778	if $tmax < $min_for;
	779
	780	my ($n, $tc);
	781
	782	# First find the minimum $n that gives a significant timing.
	783	my $zeros=0;
	784	for ($n = 1; ; $n *= 2 ) {
	785	my $t0 = Benchmark->new(0);
	786	my $td = timeit($n, $code);
	787	my $t1 = Benchmark->new(0);
	788	$tc = $td->[1] + $td->[2];
	789	if ( $tc <= 0 and $n > 1024 ) {
	790	my $d = timediff($t1, $t0);
	791	# note that $d is the total CPU time taken to call timeit(),
	792	# while $tc is is difference in CPU secs between the empty run
	793	# and the code run. If the code is trivial, its possible
	794	# for $d to get large while $tc is still zero (or slightly
	795	# negative). Bail out once timeit() starts taking more than a
	796	# few seconds without noticeable difference.
	797	if ($d->[1] + $d->[2] > 8
	798	\|\| ++$zeros > 16)
	799	{
	800	die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
	801	}
	802	} else {
	803	$zeros = 0;
	804	}
	805	last if $tc > 0.1;
	806	}
	807
	808	my $nmin = $n;
	809
	810	# Get $n high enough that we can guess the final $n with some accuracy.
	811	my $tpra = 0.1 * $tmax; # Target/time practice.
	812	while ( $tc < $tpra ) {
	813	# The 5% fudge is to keep us from iterating again all
	814	# that often (this speeds overall responsiveness when $tmax is big
	815	# and we guess a little low). This does not noticeably affect
	816	# accuracy since we're not counting these times.
	817	$n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
	818	my $td = timeit($n, $code);
	819	my $new_tc = $td->[1] + $td->[2];
	820	# Make sure we are making progress.
	821	$tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
	822	}
	823
	824	# Now, do the 'for real' timing(s), repeating until we exceed
	825	# the max.
	826	my $ntot = 0;
	827	my $rtot = 0;
	828	my $utot = 0.0;
	829	my $stot = 0.0;
	830	my $cutot = 0.0;
	831	my $cstot = 0.0;
	832	my $ttot = 0.0;
	833
	834	# The 5% fudge is because $n is often a few % low even for routines
	835	# with stable times and avoiding extra timeit()s is nice for
	836	# accuracy's sake.
	837	$n = int( $n * ( 1.05 * $tmax / $tc ) );
	838	$zeros=0;
	839	while () {
	840	my $td = timeit($n, $code);
	841	$ntot += $n;
	842	$rtot += $td->[0];
	843	$utot += $td->[1];
	844	$stot += $td->[2];
	845	$cutot += $td->[3];
	846	$cstot += $td->[4];
	847	$ttot = $utot + $stot;
	848	last if $ttot >= $tmax;
	849	if ( $ttot <= 0 ) {
	850	++$zeros > 16
	851	and die "Timing is consistently zero, cannot benchmark. N=$n\n";
	852	} else {
	853	$zeros = 0;
	854	}
	855	$ttot = 0.01 if $ttot < 0.01;
	856	my $r = $tmax / $ttot - 1; # Linear approximation.
	857	$n = int( $r * $ntot );
	858	$n = $nmin if $n < $nmin;
	859	}
	860
	861	return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
	862	}
	863
	864	# --- Functions implementing high-level time-then-print utilities
	865
	866	sub n_to_for {
	867	my $n = shift;
	868	return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
	869	}
	870
	871	$_Usage{timethis} = <<'USAGE';
	872	usage: $result = timethis($time, 'code' ); or
	873	$result = timethis($time, sub { code } );
	874	USAGE
	875
	876	sub timethis{
	877	my($n, $code, $title, $style) = @_;
	878	my($t, $forn);
	879
	880	die usage unless defined $code and
	881	(!ref $code or ref $code eq 'CODE');
	882
	883	if ( $n > 0 ) {
	884	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	885	$t = timeit($n, $code);
	886	$title = "timethis $n" unless defined $title;
	887	} else {
	888	my $fort = n_to_for( $n );
	889	$t = countit( $fort, $code );
	890	$title = "timethis for $fort" unless defined $title;
	891	$forn = $t->[-1];
	892	}
	893	local $\| = 1;
	894	$style = "" unless defined $style;
	895	printf("%10s: ", $title) unless $style eq 'none';
	896	print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none';
	897
	898	$n = $forn if defined $forn;
	899
	900	# A conservative warning to spot very silly tests.
	901	# Don't assume that your benchmark is ok simply because
	902	# you don't get this warning!
	903	print " (warning: too few iterations for a reliable count)\n"
	904	if $n < $Min_Count
	905	\|\| ($t->real < 1 && $n < 1000)
	906	\|\| $t->cpu_a < $Min_CPU;
	907	$t;
	908	}
	909
	910
	911	$_Usage{timethese} = <<'USAGE';
	912	usage: timethese($count, { Name1 => 'code1', ... }); or
	913	timethese($count, { Name1 => sub { code1 }, ... });
	914	USAGE
	915
	916	sub timethese{
	917	my($n, $alt, $style) = @_;
	918	die usage unless ref $alt eq 'HASH';
	919
	920	my @names = sort keys %$alt;
	921	$style = "" unless defined $style;
	922	print "Benchmark: " unless $style eq 'none';
	923	if ( $n > 0 ) {
	924	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	925	print "timing $n iterations of" unless $style eq 'none';
	926	} else {
	927	print "running" unless $style eq 'none';
	928	}
	929	print " ", join(', ',@names) unless $style eq 'none';
	930	unless ( $n > 0 ) {
	931	my $for = n_to_for( $n );
	932	print ", each" if $n > 1 && $style ne 'none';
	933	print " for at least $for CPU seconds" unless $style eq 'none';
	934	}
	935	print "...\n" unless $style eq 'none';
	936
	937	# we could save the results in an array and produce a summary here
	938	# sum, min, max, avg etc etc
	939	my %results;
	940	foreach my $name (@names) {
	941	$results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
	942	}
	943
	944	return \%results;
	945	}
	946
	947
	948	$_Usage{cmpthese} = <<'USAGE';
	949	usage: cmpthese($count, { Name1 => 'code1', ... }); or
	950	cmpthese($count, { Name1 => sub { code1 }, ... }); or
	951	cmpthese($result, $style);
	952	USAGE
	953
	954	sub cmpthese{
	955	my ($results, $style);
	956
	957	# $count can be a blessed object.
	958	if ( ref $_[0] eq 'HASH' ) {
	959	($results, $style) = @_;
	960	}
	961	else {
	962	my($count, $code) = @_[0,1];
	963	$style = $_[2] if defined $_[2];
	964
	965	die usage unless ref $code eq 'HASH';
	966
	967	$results = timethese($count, $code, ($style \|\| "none"));
	968	}
	969
	970	$style = "" unless defined $style;
	971
	972	# Flatten in to an array of arrays with the name as the first field
	973	my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
	974
	975	for (@vals) {
	976	# The epsilon fudge here is to prevent div by 0. Since clock
	977	# resolutions are much larger, it's below the noise floor.
	978	my $elapsed = do {
	979	if ($style eq 'nop') {$_->[4]+$_->[5]}
	980	elsif ($style eq 'noc') {$_->[2]+$_->[3]}
	981	else {$_->[2]+$_->[3]+$_->[4]+$_->[5]}
	982	};
	983	my $rate = $_->[6]/(($elapsed)+0.000000000000001);
	984	$_->[7] = $rate;
	985	}
	986
	987	# Sort by rate
	988	@vals = sort { $a->[7] <=> $b->[7] } @vals;
	989
	990	# If more than half of the rates are greater than one...
	991	my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0;
	992
	993	my @rows;
	994	my @col_widths;
	995
	996	my @top_row = (
	997	'',
	998	$display_as_rate ? 'Rate' : 's/iter',
	999	map { $_->[0] } @vals
	1000	);
	1001
	1002	push @rows, \@top_row;
	1003	@col_widths = map { length( $_ ) } @top_row;
	1004
	1005	# Build the data rows
	1006	# We leave the last column in even though it never has any data. Perhaps
	1007	# it should go away. Also, perhaps a style for a single column of
	1008	# percentages might be nice.
	1009	for my $row_val ( @vals ) {
	1010	my @row;
	1011
	1012	# Column 0 = test name
	1013	push @row, $row_val->[0];
	1014	$col_widths[0] = length( $row_val->[0] )
	1015	if length( $row_val->[0] ) > $col_widths[0];
	1016
	1017	# Column 1 = performance
	1018	my $row_rate = $row_val->[7];
	1019
	1020	# We assume that we'll never get a 0 rate.
	1021	my $rate = $display_as_rate ? $row_rate : 1 / $row_rate;
	1022
	1023	# Only give a few decimal places before switching to sci. notation,
	1024	# since the results aren't usually that accurate anyway.
	1025	my $format =
	1026	$rate >= 100 ?
	1027	"%0.0f" :
	1028	$rate >= 10 ?
	1029	"%0.1f" :
	1030	$rate >= 1 ?
	1031	"%0.2f" :
	1032	$rate >= 0.1 ?
	1033	"%0.3f" :
	1034	"%0.2e";
	1035
	1036	$format .= "/s"
	1037	if $display_as_rate;
	1038
	1039	my $formatted_rate = sprintf( $format, $rate );
	1040	push @row, $formatted_rate;
	1041	$col_widths[1] = length( $formatted_rate )
	1042	if length( $formatted_rate ) > $col_widths[1];
	1043
	1044	# Columns 2..N = performance ratios
	1045	my $skip_rest = 0;
	1046	for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
	1047	my $col_val = $vals[$col_num];
	1048	my $out;
	1049	if ( $skip_rest ) {
	1050	$out = '';
	1051	}
	1052	elsif ( $col_val->[0] eq $row_val->[0] ) {
	1053	$out = "--";
	1054	# $skip_rest = 1;
	1055	}
	1056	else {
	1057	my $col_rate = $col_val->[7];
	1058	$out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
	1059	}
	1060	push @row, $out;
	1061	$col_widths[$col_num+2] = length( $out )
	1062	if length( $out ) > $col_widths[$col_num+2];
	1063
	1064	# A little weirdness to set the first column width properly
	1065	$col_widths[$col_num+2] = length( $col_val->[0] )
	1066	if length( $col_val->[0] ) > $col_widths[$col_num+2];
	1067	}
	1068	push @rows, \@row;
	1069	}
	1070
	1071	return \@rows if $style eq "none";
	1072
	1073	# Equalize column widths in the chart as much as possible without
	1074	# exceeding 80 characters. This does not use or affect cols 0 or 1.
	1075	my @sorted_width_refs =
	1076	sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
	1077	my $max_width = ${$sorted_width_refs[-1]};
	1078
	1079	my $total = @col_widths - 1 ;
	1080	for ( @col_widths ) { $total += $_ }
	1081
	1082	STRETCHER:
	1083	while ( $total < 80 ) {
	1084	my $min_width = ${$sorted_width_refs[0]};
	1085	last
	1086	if $min_width == $max_width;
	1087	for ( @sorted_width_refs ) {
	1088	last
	1089	if $$_ > $min_width;
	1090	++$$_;
	1091	++$total;
	1092	last STRETCHER
	1093	if $total >= 80;
	1094	}
	1095	}
	1096
	1097	# Dump the output
	1098	my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
	1099	substr( $format, 1, 0 ) = '-';
	1100	for ( @rows ) {
	1101	printf $format, @$_;
	1102	}
	1103
	1104	return \@rows ;
	1105	}
	1106
	1107
	1108	1;