perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	package Benchmark;
	2
	3	use strict;
	4
	5
	6	=head1 NAME
	7
	8	Benchmark - benchmark running times of Perl code
	9
	10	=head1 SYNOPSIS
	11
	12	use Benchmark qw(:all) ;
	13
	14	timethis ($count, "code");
	15
	16	# Use Perl code in strings...
	17	timethese($count, {
	18	'Name1' => '...code1...',
	19	'Name2' => '...code2...',
	20	});
	21
	22	# ... or use subroutine references.
	23	timethese($count, {
	24	'Name1' => sub { ...code1... },
	25	'Name2' => sub { ...code2... },
	26	});
	27
	28	# cmpthese can be used both ways as well
	29	cmpthese($count, {
	30	'Name1' => '...code1...',
	31	'Name2' => '...code2...',
	32	});
	33
	34	cmpthese($count, {
	35	'Name1' => sub { ...code1... },
	36	'Name2' => sub { ...code2... },
	37	});
	38
	39	# ...or in two stages
	40	$results = timethese($count,
	41	{
	42	'Name1' => sub { ...code1... },
	43	'Name2' => sub { ...code2... },
	44	},
	45	'none'
	46	);
	47	cmpthese( $results ) ;
	48
	49	$t = timeit($count, '...other code...')
	50	print "$count loops of other code took:",timestr($t),"\n";
	51
	52	$t = countit($time, '...other code...')
	53	$count = $t->iters ;
	54	print "$count loops of other code took:",timestr($t),"\n";
	55
	56	# enable hires wallclock timing if possible
	57	use Benchmark ':hireswallclock';
	58
	59	=head1 DESCRIPTION
	60
	61	The Benchmark module encapsulates a number of routines to help you
	62	figure out how long it takes to execute some code.
	63
	64	timethis - run a chunk of code several times
	65
	66	timethese - run several chunks of code several times
	67
	68	cmpthese - print results of timethese as a comparison chart
	69
	70	timeit - run a chunk of code and see how long it goes
	71
	72	countit - see how many times a chunk of code runs in a given time
	73
	74
	75	=head2 Methods
	76
	77	=over 10
	78
	79	=item new
	80
	81	Returns the current time. Example:
	82
	83	use Benchmark;
	84	$t0 = Benchmark->new;
	85	# ... your code here ...
	86	$t1 = Benchmark->new;
	87	$td = timediff($t1, $t0);
	88	print "the code took:",timestr($td),"\n";
	89
	90	=item debug
	91
	92	Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
	93
	94	Benchmark->debug(1);
	95	$t = timeit(10, ' 5 ** $Global ');
	96	Benchmark->debug(0);
	97
	98	=item iters
	99
	100	Returns the number of iterations.
	101
	102	=back
	103
	104	=head2 Standard Exports
	105
	106	The following routines will be exported into your namespace
	107	if you use the Benchmark module:
	108
	109	=over 10
	110
	111	=item timeit(COUNT, CODE)
	112
	113	Arguments: COUNT is the number of times to run the loop, and CODE is
	114	the code to run. CODE may be either a code reference or a string to
	115	be eval'd; either way it will be run in the caller's package.
	116
	117	Returns: a Benchmark object.
	118
	119	=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
	120
	121	Time COUNT iterations of CODE. CODE may be a string to eval or a
	122	code reference; either way the CODE will run in the caller's package.
	123	Results will be printed to STDOUT as TITLE followed by the times.
	124	TITLE defaults to "timethis COUNT" if none is provided. STYLE
	125	determines the format of the output, as described for timestr() below.
	126
	127	The COUNT can be zero or negative: this means the I<minimum number of
	128	CPU seconds> to run. A zero signifies the default of 3 seconds. For
	129	example to run at least for 10 seconds:
	130
	131	timethis(-10, $code)
	132
	133	or to run two pieces of code tests for at least 3 seconds:
	134
	135	timethese(0, { test1 => '...', test2 => '...'})
	136
	137	CPU seconds is, in UNIX terms, the user time plus the system time of
	138	the process itself, as opposed to the real (wallclock) time and the
	139	time spent by the child processes. Less than 0.1 seconds is not
	140	accepted (-0.01 as the count, for example, will cause a fatal runtime
	141	exception).
	142
	143	Note that the CPU seconds is the B<minimum> time: CPU scheduling and
	144	other operating system factors may complicate the attempt so that a
	145	little bit more time is spent. The benchmark output will, however,
	146	also tell the number of C<$code> runs/second, which should be a more
	147	interesting number than the actually spent seconds.
	148
	149	Returns a Benchmark object.
	150
	151	=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
	152
	153	The CODEHASHREF is a reference to a hash containing names as keys
	154	and either a string to eval or a code reference for each value.
	155	For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
	156	call
	157
	158	timethis(COUNT, VALUE, KEY, STYLE)
	159
	160	The routines are called in string comparison order of KEY.
	161
	162	The COUNT can be zero or negative, see timethis().
	163
	164	Returns a hash reference of Benchmark objects, keyed by name.
	165
	166	=item timediff ( T1, T2 )
	167
	168	Returns the difference between two Benchmark times as a Benchmark
	169	object suitable for passing to timestr().
	170
	171	=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
	172
	173	Returns a string that formats the times in the TIMEDIFF object in
	174	the requested STYLE. TIMEDIFF is expected to be a Benchmark object
	175	similar to that returned by timediff().
	176
	177	STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
	178	each of the 5 times available ('wallclock' time, user time, system time,
	179	user time of children, and system time of children). 'noc' shows all
	180	except the two children times. 'nop' shows only wallclock and the
	181	two children times. 'auto' (the default) will act as 'all' unless
	182	the children times are both zero, in which case it acts as 'noc'.
	183	'none' prevents output.
	184
	185	FORMAT is the L<printf(3)>-style format specifier (without the
	186	leading '%') to use to print the times. It defaults to '5.2f'.
	187
	188	=back
	189
	190	=head2 Optional Exports
	191
	192	The following routines will be exported into your namespace
	193	if you specifically ask that they be imported:
	194
	195	=over 10
	196
	197	=item clearcache ( COUNT )
	198
	199	Clear the cached time for COUNT rounds of the null loop.
	200
	201	=item clearallcache ( )
	202
	203	Clear all cached times.
	204
	205	=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
	206
	207	=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
	208
	209	Optionally calls timethese(), then outputs comparison chart. This:
	210
	211	cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
	212
	213	outputs a chart like:
	214
	215	Rate b a
	216	b 2831802/s -- -61%
	217	a 7208959/s 155% --
	218
	219	This chart is sorted from slowest to fastest, and shows the percent speed
	220	difference between each pair of tests.
	221
	222	C<cmpthese> can also be passed the data structure that timethese() returns:
	223
	224	$results = timethese( -1,
	225	{ a => "++\$i", b => "\$i *= 2" } ) ;
	226	cmpthese( $results );
	227
	228	in case you want to see both sets of results.
	229	If the first argument is an unblessed hash reference,
	230	that is RESULTSHASHREF; otherwise that is COUNT.
	231
	232	Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
	233	above chart, including labels. This:
	234
	235	my $rows = cmpthese( -1,
	236	{ a => '++$i', b => '$i *= 2' }, "none" );
	237
	238	returns a data structure like:
	239
	240	[
	241	[ '', 'Rate', 'b', 'a' ],
	242	[ 'b', '2885232/s', '--', '-59%' ],
	243	[ 'a', '7099126/s', '146%', '--' ],
	244	]
	245
	246	B<NOTE>: This result value differs from previous versions, which returned
	247	the C<timethese()> result structure. If you want that, just use the two
	248	statement C<timethese>...C<cmpthese> idiom shown above.
	249
	250	Incidentally, note the variance in the result values between the two examples;
	251	this is typical of benchmarking. If this were a real benchmark, you would
	252	probably want to run a lot more iterations.
	253
	254	=item countit(TIME, CODE)
	255
	256	Arguments: TIME is the minimum length of time to run CODE for, and CODE is
	257	the code to run. CODE may be either a code reference or a string to
	258	be eval'd; either way it will be run in the caller's package.
	259
	260	TIME is I<not> negative. countit() will run the loop many times to
	261	calculate the speed of CODE before running it for TIME. The actual
	262	time run for will usually be greater than TIME due to system clock
	263	resolution, so it's best to look at the number of iterations divided
	264	by the times that you are concerned with, not just the iterations.
	265
	266	Returns: a Benchmark object.
	267
	268	=item disablecache ( )
	269
	270	Disable caching of timings for the null loop. This will force Benchmark
	271	to recalculate these timings for each new piece of code timed.
	272
	273	=item enablecache ( )
	274
	275	Enable caching of timings for the null loop. The time taken for COUNT
	276	rounds of the null loop will be calculated only once for each
	277	different COUNT used.
	278
	279	=item timesum ( T1, T2 )
	280
	281	Returns the sum of two Benchmark times as a Benchmark object suitable
	282	for passing to timestr().
	283
	284	=back
	285
	286	=head2 :hireswallclock
	287
	288	If the Time::HiRes module has been installed, you can specify the
	289	special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
	290	available, the tag will be silently ignored). This tag will cause the
	291	wallclock time to be measured in microseconds, instead of integer
	292	seconds. Note though that the speed computations are still conducted
	293	in CPU time, not wallclock time.
	294
	295	=head1 Benchmark Object
	296
	297	Many of the functions in this module return a Benchmark object,
	298	or in the case of C<timethese()>, a reference to a hash, the values of
	299	which are Benchmark objects. This is useful if you want to store or
	300	further process results from Benchmark functions.
	301
	302	Internally the Benchmark object holds timing values,
	303	described in L</"NOTES"> below.
	304	The following methods can be used to access them:
	305
	306	=over 4
	307
	308	=item cpu_p
	309
	310	Total CPU (User + System) of the main (parent) process.
	311
	312	=item cpu_c
	313
	314	Total CPU (User + System) of any children processes.
	315
	316	=item cpu_a
	317
	318	Total CPU of parent and any children processes.
	319
	320	=item real
	321
	322	Real elapsed time "wallclock seconds".
	323
	324	=item iters
	325
	326	Number of iterations run.
	327
	328	=back
	329
	330	The following illustrates use of the Benchmark object:
	331
	332	$result = timethis(100000, sub { ... });
	333	print "total CPU = ", $result->cpu_a, "\n";
	334
	335	=head1 NOTES
	336
	337	The data is stored as a list of values from the time and times
	338	functions:
	339
	340	($real, $user, $system, $children_user, $children_system, $iters)
	341
	342	in seconds for the whole loop (not divided by the number of rounds).
	343
	344	The timing is done using time(3) and times(3).
	345
	346	Code is executed in the caller's package.
	347
	348	The time of the null loop (a loop with the same
	349	number of rounds but empty loop body) is subtracted
	350	from the time of the real loop.
	351
	352	The null loop times can be cached, the key being the
	353	number of rounds. The caching can be controlled using
	354	calls like these:
	355
	356	clearcache($key);
	357	clearallcache();
	358
	359	disablecache();
	360	enablecache();
	361
	362	Caching is off by default, as it can (usually slightly) decrease
	363	accuracy and does not usually noticeably affect runtimes.
	364
	365	=head1 EXAMPLES
	366
	367	For example,
	368
	369	use Benchmark qw( cmpthese ) ;
	370	$x = 3;
	371	cmpthese( -5, {
	372	a => sub{$x*$x},
	373	b => sub{$x**2},
	374	} );
	375
	376	outputs something like this:
	377
	378	Benchmark: running a, b, each for at least 5 CPU seconds...
	379	Rate b a
	380	b 1559428/s -- -62%
	381	a 4152037/s 166% --
	382
	383
	384	while
	385
	386	use Benchmark qw( timethese cmpthese ) ;
	387	$x = 3;
	388	$r = timethese( -5, {
	389	a => sub{$x*$x},
	390	b => sub{$x**2},
	391	} );
	392	cmpthese $r;
	393
	394	outputs something like this:
	395
	396	Benchmark: running a, b, each for at least 5 CPU seconds...
	397	a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
	398	b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
	399	Rate b a
	400	b 1574945/s -- -59%
	401	a 3835056/s 144% --
	402
	403
	404	=head1 INHERITANCE
	405
	406	Benchmark inherits from no other class, except of course
	407	from Exporter.
	408
	409	=head1 CAVEATS
	410
	411	Comparing eval'd strings with code references will give you
	412	inaccurate results: a code reference will show a slightly slower
	413	execution time than the equivalent eval'd string.
	414
	415	The real time timing is done using time(2) and
	416	the granularity is therefore only one second.
	417
	418	Short tests may produce negative figures because perl
	419	can appear to take longer to execute the empty loop
	420	than a short test; try:
	421
	422	timethis(100,'1');
	423
	424	The system time of the null loop might be slightly
	425	more than the system time of the loop with the actual
	426	code and therefore the difference might end up being E<lt> 0.
	427
	428	=head1 SEE ALSO
	429
	430	L<Devel::NYTProf> - a Perl code profiler
	431
	432	=head1 AUTHORS
	433
	434	Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
	435
	436	=head1 MODIFICATION HISTORY
	437
	438	September 8th, 1994; by Tim Bunce.
	439
	440	March 28th, 1997; by Hugo van der Sanden: added support for code
	441	references and the already documented 'debug' method; revamped
	442	documentation.
	443
	444	April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
	445	functionality.
	446
	447	September, 1999; by Barrie Slaymaker: math fixes and accuracy and
	448	efficiency tweaks. Added cmpthese(). A result is now returned from
	449	timethese(). Exposed countit() (was runfor()).
	450
	451	December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
	452	and return an empty string. If cmpthese is calling timethese, make it pass the
	453	style in. (so that 'none' will suppress output). Make sub new dump its
	454	debugging output to STDERR, to be consistent with everything else.
	455	All bugs found while writing a regression test.
	456
	457	September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
	458
	459	February, 2004; by Chia-liang Kao: make cmpthese and timestr use time
	460	statistics for children instead of parent when the style is 'nop'.
	461
	462	November, 2007; by Christophe Grosjean: make cmpthese and timestr compute
	463	time consistently with style argument, default is 'all' not 'noc' any more.
	464
	465	=cut
	466
	467	# evaluate something in a clean lexical environment
	468	sub _doeval { no strict; eval shift }
	469
	470	#
	471	# put any lexicals at file scope AFTER here
	472	#
	473
	474	use Carp;
	475	use Exporter;
	476
	477	our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
	478
	479	@ISA=qw(Exporter);
	480	@EXPORT=qw(timeit timethis timethese timediff timestr);
	481	@EXPORT_OK=qw(timesum cmpthese countit
	482	clearcache clearallcache disablecache enablecache);
	483	%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
	484
	485	$VERSION = 1.22;
	486
	487	# --- ':hireswallclock' special handling
	488
	489	my $hirestime;
	490
	491	sub mytime () { time }
	492
	493	init();
	494
	495	sub BEGIN {
	496	if (eval 'require Time::HiRes') {
	497	import Time::HiRes qw(time);
	498	$hirestime = \&Time::HiRes::time;
	499	}
	500	}
	501
	502	sub import {
	503	my $class = shift;
	504	if (grep { $_ eq ":hireswallclock" } @_) {
	505	@_ = grep { $_ ne ":hireswallclock" } @_;
	506	local $^W=0;
	507	*mytime = $hirestime if defined $hirestime;
	508	}
	509	Benchmark->export_to_level(1, $class, @_);
	510	}
	511
	512	our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style,
	513	%_Usage, %Cache, $Do_Cache);
	514
	515	sub init {
	516	$Debug = 0;
	517	$Min_Count = 4;
	518	$Min_CPU = 0.4;
	519	$Default_Format = '5.2f';
	520	$Default_Style = 'auto';
	521	# The cache can cause a slight loss of sys time accuracy. If a
	522	# user does many tests (>10) with very large counts (>10000)
	523	# or works on a very slow machine the cache may be useful.
	524	disablecache();
	525	clearallcache();
	526	}
	527
	528	sub debug { $Debug = ($_[1] != 0); }
	529
	530	sub usage {
	531	my $calling_sub = (caller(1))[3];
	532	$calling_sub =~ s/^Benchmark:://;
	533	return $_Usage{$calling_sub} \|\| '';
	534	}
	535
	536	# The cache needs two branches: 's' for strings and 'c' for code. The
	537	# empty loop is different in these two cases.
	538
	539	$_Usage{clearcache} = <<'USAGE';
	540	usage: clearcache($count);
	541	USAGE
	542
	543	sub clearcache {
	544	die usage unless @_ == 1;
	545	delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"};
	546	}
	547
	548	$_Usage{clearallcache} = <<'USAGE';
	549	usage: clearallcache();
	550	USAGE
	551
	552	sub clearallcache {
	553	die usage if @_;
	554	%Cache = ();
	555	}
	556
	557	$_Usage{enablecache} = <<'USAGE';
	558	usage: enablecache();
	559	USAGE
	560
	561	sub enablecache {
	562	die usage if @_;
	563	$Do_Cache = 1;
	564	}
	565
	566	$_Usage{disablecache} = <<'USAGE';
	567	usage: disablecache();
	568	USAGE
	569
	570	sub disablecache {
	571	die usage if @_;
	572	$Do_Cache = 0;
	573	}
	574
	575
	576	# --- Functions to process the 'time' data type
	577
	578	sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
	579	print STDERR "new=@t\n" if $Debug;
	580	bless \@t; }
	581
	582	sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
	583	sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
	584	sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
	585	sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
	586	sub iters { $_[0]->[5] ; }
	587
	588	# return the sum of various times: which ones depending on $style
	589
	590	sub elapsed {
	591	my ($self, $style) = @_;
	592	$style = "" unless defined $style;
	593
	594	return $self->cpu_c if $style eq 'nop';
	595	return $self->cpu_p if $style eq 'noc';
	596	return $self->cpu_a;
	597	}
	598
	599
	600	$_Usage{timediff} = <<'USAGE';
	601	usage: $result_diff = timediff($result1, $result2);
	602	USAGE
	603
	604	sub timediff {
	605	my($a, $b) = @_;
	606
	607	die usage unless ref $a and ref $b;
	608
	609	my @r;
	610	for (my $i=0; $i < @$a; ++$i) {
	611	push(@r, $a->[$i] - $b->[$i]);
	612	}
	613	#die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]\|@$b[1,2])\n"
	614	# if ($r[1] + $r[2]) < 0;
	615	bless \@r;
	616	}
	617
	618	$_Usage{timesum} = <<'USAGE';
	619	usage: $sum = timesum($result1, $result2);
	620	USAGE
	621
	622	sub timesum {
	623	my($a, $b) = @_;
	624
	625	die usage unless ref $a and ref $b;
	626
	627	my @r;
	628	for (my $i=0; $i < @$a; ++$i) {
	629	push(@r, $a->[$i] + $b->[$i]);
	630	}
	631	bless \@r;
	632	}
	633
	634
	635	$_Usage{timestr} = <<'USAGE';
	636	usage: $formatted_result = timestr($result1);
	637	USAGE
	638
	639	sub timestr {
	640	my($tr, $style, $f) = @_;
	641
	642	die usage unless ref $tr;
	643
	644	my @t = @$tr;
	645	warn "bad time value (@t)" unless @t==6;
	646	my($r, $pu, $ps, $cu, $cs, $n) = @t;
	647	my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
	648	$f = $Default_Format unless defined $f;
	649	# format a time in the required style, other formats may be added here
	650	$style \|\|= $Default_Style;
	651	return '' if $style eq 'none';
	652	$style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
	653	my $s = "@t $style"; # default for unknown style
	654	my $w = $hirestime ? "%2g" : "%2d";
	655	$s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
	656	$r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
	657	$s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
	658	$r,$pu,$ps,$pt) if $style eq 'noc';
	659	$s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
	660	$r,$cu,$cs,$ct) if $style eq 'nop';
	661	my $elapsed = $tr->elapsed($style);
	662	$s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed;
	663	$s;
	664	}
	665
	666	sub timedebug {
	667	my($msg, $t) = @_;
	668	print STDERR "$msg",timestr($t),"\n" if $Debug;
	669	}
	670
	671	# --- Functions implementing low-level support for timing loops
	672
	673	$_Usage{runloop} = <<'USAGE';
	674	usage: runloop($number, [$string \| $coderef])
	675	USAGE
	676
	677	sub runloop {
	678	my($n, $c) = @_;
	679
	680	$n+=0; # force numeric now, so garbage won't creep into the eval
	681	croak "negative loopcount $n" if $n<0;
	682	confess usage unless defined $c;
	683	my($t0, $t1, $td); # before, after, difference
	684
	685	# find package of caller so we can execute code there
	686	my($curpack) = caller(0);
	687	my($i, $pack)= 0;
	688	while (($pack) = caller(++$i)) {
	689	last if $pack ne $curpack;
	690	}
	691
	692	my ($subcode, $subref);
	693	if (ref $c eq 'CODE') {
	694	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
	695	$subref = eval $subcode;
	696	}
	697	else {
	698	$subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
	699	$subref = _doeval($subcode);
	700	}
	701	croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
	702	print STDERR "runloop $n '$subcode'\n" if $Debug;
	703
	704	# Wait for the user timer to tick. This makes the error range more like
	705	# -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
	706	# may not seem important, but it significantly reduces the chances of
	707	# getting a too low initial $n in the initial, 'find the minimum' loop
	708	# in &countit. This, in turn, can reduce the number of calls to
	709	# &runloop a lot, and thus reduce additive errors.
	710	#
	711	# Note that its possible for the act of reading the system clock to
	712	# burn lots of system CPU while we burn very little user clock in the
	713	# busy loop, which can cause the loop to run for a very long wall time.
	714	# So gradually ramp up the duration of the loop. See RT #122003
	715	#
	716	my $tbase = Benchmark->new(0)->[1];
	717	my $limit = 1;
	718	while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {
	719	for (my $i=0; $i < $limit; $i++) { my $x = $i / 1.5 } # burn user CPU
	720	$limit *= 1.1;
	721	}
	722	$subref->();
	723	$t1 = Benchmark->new($n);
	724	$td = &timediff($t1, $t0);
	725	timedebug("runloop:",$td);
	726	$td;
	727	}
	728
	729	$_Usage{timeit} = <<'USAGE';
	730	usage: $result = timeit($count, 'code' ); or
	731	$result = timeit($count, sub { code } );
	732	USAGE
	733
	734	sub timeit {
	735	my($n, $code) = @_;
	736	my($wn, $wc, $wd);
	737
	738	die usage unless defined $code and
	739	(!ref $code or ref $code eq 'CODE');
	740
	741	printf STDERR "timeit $n $code\n" if $Debug;
	742	my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
	743	if ($Do_Cache && exists $Cache{$cache_key} ) {
	744	$wn = $Cache{$cache_key};
	745	} else {
	746	$wn = &runloop($n, ref( $code ) ? sub { } : '' );
	747	# Can't let our baseline have any iterations, or they get subtracted
	748	# out of the result.
	749	$wn->[5] = 0;
	750	$Cache{$cache_key} = $wn;
	751	}
	752
	753	$wc = &runloop($n, $code);
	754
	755	$wd = timediff($wc, $wn);
	756	timedebug("timeit: ",$wc);
	757	timedebug(" - ",$wn);
	758	timedebug(" = ",$wd);
	759
	760	$wd;
	761	}
	762
	763
	764	my $default_for = 3;
	765	my $min_for = 0.1;
	766
	767
	768	$_Usage{countit} = <<'USAGE';
	769	usage: $result = countit($time, 'code' ); or
	770	$result = countit($time, sub { code } );
	771	USAGE
	772
	773	sub countit {
	774	my ( $tmax, $code ) = @_;
	775
	776	die usage unless @_;
	777
	778	if ( not defined $tmax or $tmax == 0 ) {
	779	$tmax = $default_for;
	780	} elsif ( $tmax < 0 ) {
	781	$tmax = -$tmax;
	782	}
	783
	784	die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
	785	if $tmax < $min_for;
	786
	787	my ($n, $tc);
	788
	789	# First find the minimum $n that gives a significant timing.
	790	my $zeros=0;
	791	for ($n = 1; ; $n *= 2 ) {
	792	my $t0 = Benchmark->new(0);
	793	my $td = timeit($n, $code);
	794	my $t1 = Benchmark->new(0);
	795	$tc = $td->[1] + $td->[2];
	796	if ( $tc <= 0 and $n > 1024 ) {
	797	my $d = timediff($t1, $t0);
	798	# note that $d is the total CPU time taken to call timeit(),
	799	# while $tc is is difference in CPU secs between the empty run
	800	# and the code run. If the code is trivial, its possible
	801	# for $d to get large while $tc is still zero (or slightly
	802	# negative). Bail out once timeit() starts taking more than a
	803	# few seconds without noticeable difference.
	804	if ($d->[1] + $d->[2] > 8
	805	\|\| ++$zeros > 16)
	806	{
	807	die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
	808	}
	809	} else {
	810	$zeros = 0;
	811	}
	812	last if $tc > 0.1;
	813	}
	814
	815	my $nmin = $n;
	816
	817	# Get $n high enough that we can guess the final $n with some accuracy.
	818	my $tpra = 0.1 * $tmax; # Target/time practice.
	819	while ( $tc < $tpra ) {
	820	# The 5% fudge is to keep us from iterating again all
	821	# that often (this speeds overall responsiveness when $tmax is big
	822	# and we guess a little low). This does not noticeably affect
	823	# accuracy since we're not counting these times.
	824	$n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
	825	my $td = timeit($n, $code);
	826	my $new_tc = $td->[1] + $td->[2];
	827	# Make sure we are making progress.
	828	$tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
	829	}
	830
	831	# Now, do the 'for real' timing(s), repeating until we exceed
	832	# the max.
	833	my $ntot = 0;
	834	my $rtot = 0;
	835	my $utot = 0.0;
	836	my $stot = 0.0;
	837	my $cutot = 0.0;
	838	my $cstot = 0.0;
	839	my $ttot = 0.0;
	840
	841	# The 5% fudge is because $n is often a few % low even for routines
	842	# with stable times and avoiding extra timeit()s is nice for
	843	# accuracy's sake.
	844	$n = int( $n * ( 1.05 * $tmax / $tc ) );
	845	$zeros=0;
	846	while () {
	847	my $td = timeit($n, $code);
	848	$ntot += $n;
	849	$rtot += $td->[0];
	850	$utot += $td->[1];
	851	$stot += $td->[2];
	852	$cutot += $td->[3];
	853	$cstot += $td->[4];
	854	$ttot = $utot + $stot;
	855	last if $ttot >= $tmax;
	856	if ( $ttot <= 0 ) {
	857	++$zeros > 16
	858	and die "Timing is consistently zero, cannot benchmark. N=$n\n";
	859	} else {
	860	$zeros = 0;
	861	}
	862	$ttot = 0.01 if $ttot < 0.01;
	863	my $r = $tmax / $ttot - 1; # Linear approximation.
	864	$n = int( $r * $ntot );
	865	$n = $nmin if $n < $nmin;
	866	}
	867
	868	return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
	869	}
	870
	871	# --- Functions implementing high-level time-then-print utilities
	872
	873	sub n_to_for {
	874	my $n = shift;
	875	return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
	876	}
	877
	878	$_Usage{timethis} = <<'USAGE';
	879	usage: $result = timethis($time, 'code' ); or
	880	$result = timethis($time, sub { code } );
	881	USAGE
	882
	883	sub timethis{
	884	my($n, $code, $title, $style) = @_;
	885	my($t, $forn);
	886
	887	die usage unless defined $code and
	888	(!ref $code or ref $code eq 'CODE');
	889
	890	if ( $n > 0 ) {
	891	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	892	$t = timeit($n, $code);
	893	$title = "timethis $n" unless defined $title;
	894	} else {
	895	my $fort = n_to_for( $n );
	896	$t = countit( $fort, $code );
	897	$title = "timethis for $fort" unless defined $title;
	898	$forn = $t->[-1];
	899	}
	900	local $\| = 1;
	901	$style = "" unless defined $style;
	902	printf("%10s: ", $title) unless $style eq 'none';
	903	print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none';
	904
	905	$n = $forn if defined $forn;
	906
	907	if ($t->elapsed($style) < 0) {
	908	# due to clock granularity and variable CPU speed and load,
	909	# on quick code with a small number of loops, it's possible for
	910	# the empty loop to appear to take longer than the real loop
	911	# (e.g. 1 tick versus 0 ticks). This leads to a negative elapsed
	912	# time. In this case, floor it at zero, to stop bizarre results.
	913	print " (warning: too few iterations for a reliable count)\n";
	914	$t->[$_] = 0 for 1..4;
	915	}
	916
	917	# A conservative warning to spot very silly tests.
	918	# Don't assume that your benchmark is ok simply because
	919	# you don't get this warning!
	920	print " (warning: too few iterations for a reliable count)\n"
	921	if $n < $Min_Count
	922	\|\| ($t->real < 1 && $n < 1000)
	923	\|\| $t->cpu_a < $Min_CPU;
	924	$t;
	925	}
	926
	927
	928	$_Usage{timethese} = <<'USAGE';
	929	usage: timethese($count, { Name1 => 'code1', ... }); or
	930	timethese($count, { Name1 => sub { code1 }, ... });
	931	USAGE
	932
	933	sub timethese{
	934	my($n, $alt, $style) = @_;
	935	die usage unless ref $alt eq 'HASH';
	936
	937	my @names = sort keys %$alt;
	938	$style = "" unless defined $style;
	939	print "Benchmark: " unless $style eq 'none';
	940	if ( $n > 0 ) {
	941	croak "non-integer loopcount $n, stopped" if int($n)<$n;
	942	print "timing $n iterations of" unless $style eq 'none';
	943	} else {
	944	print "running" unless $style eq 'none';
	945	}
	946	print " ", join(', ',@names) unless $style eq 'none';
	947	unless ( $n > 0 ) {
	948	my $for = n_to_for( $n );
	949	print ", each" if $n > 1 && $style ne 'none';
	950	print " for at least $for CPU seconds" unless $style eq 'none';
	951	}
	952	print "...\n" unless $style eq 'none';
	953
	954	# we could save the results in an array and produce a summary here
	955	# sum, min, max, avg etc etc
	956	my %results;
	957	foreach my $name (@names) {
	958	$results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
	959	}
	960
	961	return \%results;
	962	}
	963
	964
	965	$_Usage{cmpthese} = <<'USAGE';
	966	usage: cmpthese($count, { Name1 => 'code1', ... }); or
	967	cmpthese($count, { Name1 => sub { code1 }, ... }); or
	968	cmpthese($result, $style);
	969	USAGE
	970
	971	sub cmpthese{
	972	my ($results, $style);
	973
	974	# $count can be a blessed object.
	975	if ( ref $_[0] eq 'HASH' ) {
	976	($results, $style) = @_;
	977	}
	978	else {
	979	my($count, $code) = @_[0,1];
	980	$style = $_[2] if defined $_[2];
	981
	982	die usage unless ref $code eq 'HASH';
	983
	984	$results = timethese($count, $code, ($style \|\| "none"));
	985	}
	986
	987	$style = "" unless defined $style;
	988
	989	# Flatten in to an array of arrays with the name as the first field
	990	my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
	991
	992	for (@vals) {
	993	# recreate the pre-flattened Benchmark object
	994	my $tmp_bm = bless [ @{$_}[1..$#$_] ];
	995	my $elapsed = $tmp_bm->elapsed($style);
	996	# The epsilon fudge here is to prevent div by 0. Since clock
	997	# resolutions are much larger, it's below the noise floor.
	998	my $rate = $_->[6]/(($elapsed)+0.000000000000001);
	999	$_->[7] = $rate;
	1000	}
	1001
	1002	# Sort by rate
	1003	@vals = sort { $a->[7] <=> $b->[7] } @vals;
	1004
	1005	# If more than half of the rates are greater than one...
	1006	my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0;
	1007
	1008	my @rows;
	1009	my @col_widths;
	1010
	1011	my @top_row = (
	1012	'',
	1013	$display_as_rate ? 'Rate' : 's/iter',
	1014	map { $_->[0] } @vals
	1015	);
	1016
	1017	push @rows, \@top_row;
	1018	@col_widths = map { length( $_ ) } @top_row;
	1019
	1020	# Build the data rows
	1021	# We leave the last column in even though it never has any data. Perhaps
	1022	# it should go away. Also, perhaps a style for a single column of
	1023	# percentages might be nice.
	1024	for my $row_val ( @vals ) {
	1025	my @row;
	1026
	1027	# Column 0 = test name
	1028	push @row, $row_val->[0];
	1029	$col_widths[0] = length( $row_val->[0] )
	1030	if length( $row_val->[0] ) > $col_widths[0];
	1031
	1032	# Column 1 = performance
	1033	my $row_rate = $row_val->[7];
	1034
	1035	# We assume that we'll never get a 0 rate.
	1036	my $rate = $display_as_rate ? $row_rate : 1 / $row_rate;
	1037
	1038	# Only give a few decimal places before switching to sci. notation,
	1039	# since the results aren't usually that accurate anyway.
	1040	my $format =
	1041	$rate >= 100 ?
	1042	"%0.0f" :
	1043	$rate >= 10 ?
	1044	"%0.1f" :
	1045	$rate >= 1 ?
	1046	"%0.2f" :
	1047	$rate >= 0.1 ?
	1048	"%0.3f" :
	1049	"%0.2e";
	1050
	1051	$format .= "/s"
	1052	if $display_as_rate;
	1053
	1054	my $formatted_rate = sprintf( $format, $rate );
	1055	push @row, $formatted_rate;
	1056	$col_widths[1] = length( $formatted_rate )
	1057	if length( $formatted_rate ) > $col_widths[1];
	1058
	1059	# Columns 2..N = performance ratios
	1060	my $skip_rest = 0;
	1061	for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
	1062	my $col_val = $vals[$col_num];
	1063	my $out;
	1064	if ( $skip_rest ) {
	1065	$out = '';
	1066	}
	1067	elsif ( $col_val->[0] eq $row_val->[0] ) {
	1068	$out = "--";
	1069	# $skip_rest = 1;
	1070	}
	1071	else {
	1072	my $col_rate = $col_val->[7];
	1073	$out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
	1074	}
	1075	push @row, $out;
	1076	$col_widths[$col_num+2] = length( $out )
	1077	if length( $out ) > $col_widths[$col_num+2];
	1078
	1079	# A little weirdness to set the first column width properly
	1080	$col_widths[$col_num+2] = length( $col_val->[0] )
	1081	if length( $col_val->[0] ) > $col_widths[$col_num+2];
	1082	}
	1083	push @rows, \@row;
	1084	}
	1085
	1086	return \@rows if $style eq "none";
	1087
	1088	# Equalize column widths in the chart as much as possible without
	1089	# exceeding 80 characters. This does not use or affect cols 0 or 1.
	1090	my @sorted_width_refs =
	1091	sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
	1092	my $max_width = ${$sorted_width_refs[-1]};
	1093
	1094	my $total = @col_widths - 1 ;
	1095	for ( @col_widths ) { $total += $_ }
	1096
	1097	STRETCHER:
	1098	while ( $total < 80 ) {
	1099	my $min_width = ${$sorted_width_refs[0]};
	1100	last
	1101	if $min_width == $max_width;
	1102	for ( @sorted_width_refs ) {
	1103	last
	1104	if $$_ > $min_width;
	1105	++$$_;
	1106	++$total;
	1107	last STRETCHER
	1108	if $total >= 80;
	1109	}
	1110	}
	1111
	1112	# Dump the output
	1113	my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
	1114	substr( $format, 1, 0 ) = '-';
	1115	for ( @rows ) {
	1116	printf $format, @$_;
	1117	}
	1118
	1119	return \@rows ;
	1120	}
	1121
	1122
	1123	1;