#!/usr/bin/perl # # bottleneck - print saturation averages for CPU, memory, disk and network. # May quickly highlight a system bottleneck. Solaris 8+. # # This Perl program uses the Sun::Solaris::Kstat library to fetch values. # # 22-Sep-2005, ver 0.88 (check for new versions, http://www.brendangregg.com) # # # USAGE: bottleneck [-h] | [interval [count]] # bottleneck # print a 1 second sample # bottleneck -h # print help # bottleneck 5 # print continually, every 5 seconds # bottleneck 1 5 # print 5 times, every 1 second # # This program prints the saturation values from four other programs on one # line: loadcpu, loadmem, loaddisk and loadnet. These other programs # contain the documentation on how these saturation or "load" values are # calculated and what they represent. A summary is, # # CPU # threads on the run queue # Memory # scan rate of the page scanner # Disk # operations on the wait queue # Network # errors due to buffer saturation # # A load of 1.00 indicates moderate saturation of the resource (usually bad), # a load of 4.00 would indicate heavy saturation or demand for the resource. # A load of 0.00 does not indicate idle or unused - rather not saturated. # See other Solaris commands for levels of usage or utilisation, or for # further details of saturation. # # The first line is the summary since boot. # # NOTE: For unusual disks or network cards, check their instance names are # in this code (a few lines beneath this block comment). # # # SEE ALSO: sysperfstat, loadcpu, loadmem, loaddisk, loadnet # http://www.brendangregg.com/k9toolkit.html # # COPYRIGHT: Copyright (c) 2004 Brendan Gregg. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # (http://www.gnu.org/copyleft/gpl.html) # # Author: Brendan Gregg [Sydney, Australia] # # 23-Mar-2004 Brendan Gregg Created this. # 19-Mar-2005 " " Added summary since boot line. use Sun::Solaris::Kstat; my $Kstat = Sun::Solaris::Kstat->new(); # # Disk instance names # @Disk = qw(cmdk dad sd ssd); # # Network card instance names # @Network = qw(be bge ce ci dmfe e1000g el eri elxl fa ge hme ipdptp iprb lane le nf ppp qe qfe rtls sppp vge); # # --- Process command line args --- # if ($ARGV[0] eq "-h" || $ARGV[0] eq "--help" || $ARGV[0] eq "0") { &usage(); } $sleep = $ARGV[0]; $loop = $ARGV[1]; if ($sleep eq "") { $sleep = 1; $loop = 0; } elsif ($loop eq "") { $loop = 2**32; } $PAGESIZE = 20; # max lines per header $lines = $PAGESIZE; # counter for lines printed $| = 1; $Disk{$_} = 1 foreach (@Disk); $Network{$_} = 1 foreach (@Network); # # --- Main --- # while (1) { if ($lines++ >= $PAGESIZE) { $lines = 0; printf("%8s %6s %6s %6s %6s\n","Time","CPU","Mem","Disk","Net"); } # # Store old values # $oldupdate1 = $update1; $oldupdate2 = $update2; $oldupdate3 = $update3; $oldupdate4 = $update4; $oldrunque = $runque; $oldscan = $scan; $oldwait = $wait; $olderror = $error; # # Get new values # $Kstat->update(); ($runque,$update1) = fetch_cpu(); ($scan,$update2) = fetch_mem(); ($wait,$update3) = fetch_disk(); ($error,$update4) = fetch_net(); # # Calculate load averages # $cpu = ratio($runque,$oldrunque,$update1,$oldupdate1); $mem = ratio($scan,$oldscan,$update2,$oldupdate2); $disk = ratio($wait,$oldwait,$update3,$oldupdate3); $net = ratio($error,$olderror,$update4,$oldupdate4); # # Print load averages # @Time = localtime(); printf("%02d:%02d:%02d %6s %6s %6s %6s\n",$Time[2], $Time[1],$Time[0],$cpu,$mem,$disk,$net); ### Check for end last if $count++ == $loop; ### Interval sleep ($sleep); } # # --- Subroutines --- # # fetch_cpu - fetch current values for runque and updates. # sub fetch_cpu { return ($Kstat->{unix}->{0}->{sysinfo}->{runque}, $Kstat->{unix}->{0}->{sysinfo}->{updates}); } # fetch_mem - fetch KStat values for the scanrate. The values used are # scan and snaptime. # sub fetch_mem { my ($scan,$time,$module,$instance,$name); my (%Modules,%Instances,%Names); $scan = 0; $Modules = $Kstat->{cpu_stat}; foreach $instance (keys(%$Modules)) { $Instances = $Modules->{$instance}; foreach $name (keys(%$Instances)) { $Names = $Instances->{$name}; if (defined $$Names{scan}) { $scan += $$Names{scan}; # use the last wlastupdate value found, $time = $$Names{snaptime}; } } } # # Divide scanrate by slowscan. This gives more sensible load averages, # eg a consistant load of 1.00 indicates consistantly at slowscan. # slowscan is usually 100. # $scan = $scan / $Kstat->{unix}->{0}->{system_pages}->{slowscan}; return ($scan,$time); } # fetch_disk - fetch KStat values for the disks. The values used are wlentime # and wlastupdate. # sub fetch_disk { my ($wait,$time,$module,$instance,$name); my (%Modules,%Instances,%Names); $wait = 0; foreach $module (keys(%$Kstat)) { ### Check that this is a disk structure, next unless $Disk{$module}; $Modules = $Kstat->{$module}; foreach $instance (keys(%$Modules)) { $Instances = $Modules->{$instance}; foreach $name (keys(%$Instances)) { ### Check that this isn't a slice next if $name =~ /,/; $Names = $Instances->{$name}; if (defined $$Names{wlentime}) { $wait += $$Names{wlentime}; # use the last wlastupdate value found, $time = $$Names{wlastupdate}; } } } } return ($wait,$time); } # fetch_net - fetch KStat values for the network interfaces. The values used # are defer, nocanput, norcvbuf and noxmtbuf. # sub fetch_net { my ($error,$time,$module,$instance,$name); my (%Modules,%Instances,%Names); $error = 0; foreach $module (keys(%$Kstat)) { ### Check that this is a disk structure, next unless $Network{$module}; $Modules = $Kstat->{$module}; foreach $instance (keys(%$Modules)) { $Instances = $Modules->{$instance}; foreach $name (keys(%$Instances)) { $Names = $Instances->{$name}; if (defined $$Names{nocanput} || defined $$Names{norcvbuf}) { $error += $$Names{defer}; $error += $$Names{nocanput}; $error += $$Names{norcvbuf}; $error += $$Names{noxmtbuf}; # use the last wlastupdate value found, $time = $$Names{snaptime}; } } } } # # Divide errors by 200. This gives more sensible load averages, # such as 4.00 meaning heavily saturated rather than 800.00. # Future versions of this program may use a more elegant technique # rather than a factor of 200. # $error = $error / 200; return ($error,$time); } # ratio - calculate the ratio of the count delta over time delta; # given count and oldcount, time and oldtime. Returns a string # of the value, or a null string if not enough data was given. # sub ratio { my ($count,$oldcount,$time,$oldtime) = @_; $countd = $count - $oldcount; $timed = $time - $oldtime; if ($timed > 0) { $ratio = $countd / $timed; } else { $ratio = 0; } return sprintf("%.2f",$ratio); } # usage - print usage and exit. # sub usage { print STDERR <