#!/usr/bin/perl -w # # perf2runs Filter Linux "perf script" output to only show single CPU runs. # # USAGE EXAMPLE: # # perf record -F 100 -a -- sleep 60 # perf script | ./perf2runs.pl # # This is a specific tool for a specific problem: where a multi-threaded app # encounters a single thread blocking the world, and running on the same CPU # for several samples. This plucks out these bottlenecks. # # 15-Mar-2016 Brendan Gregg Created this. use strict; # minsamples sets the threshold for detecting a run on the same CPU. It is the # minimum number of same-CPU consecutive samples for identifying a run. When # sampling at 100 Hertz, a minsamples of 4 suggests a 40 ms single-threaded # run, however, because of the random ordering of CPU samples in the profile, # there is an error margin of 2 samples (as the "stuck" CPU may randomly have # printed last before the run, and first after the run, appearing to be part # of the run; this program could be improved to detect this and remove this # error margin). So 4 means at least 20 ms, usually 40ms. 3 would mean at # least 10 ms, usually 30ms. my $minsamples = 4; my $run = 0; my $line; my $next; my @stack; my @lastcpu; my ($cpu, $ts, $event); for (my $i = 0; $i < $minsamples; $i++) { $lastcpu[$i] = -1; } while (1) { # skip comments undef $next; $line = <>; haveline: last unless defined $line; next if $line =~ /^#/; # # Parsing # # ip only examples: # # java 52025 [026] 99161.926202: cycles: # java 14341 [016] 252732.474759: cycles: 7f36571947c0 nmethod::is_nmethod() const (/... # java 14514 [022] 28191.353083: cpu-clock: 7f92b4fdb7d4 Ljava_util_List$size$0;::call (/tmp/perf-11936.map) # # stack examples (-g): # # swapper 0 [021] 28648.467059: cpu-clock: # ffffffff810013aa xen_hypercall_sched_op ([kernel.kallsyms]) # ffffffff8101cb2f default_idle ([kernel.kallsyms]) # ffffffff8101d406 arch_cpu_idle ([kernel.kallsyms]) # ffffffff810bf475 cpu_startup_entry ([kernel.kallsyms]) # ffffffff81010228 cpu_bringup_and_idle ([kernel.kallsyms]) # # java 14375 [022] 28648.467079: cpu-clock: # 7f92bdd98965 Ljava/io/OutputStream;::write (/tmp/perf-11936.map) # 7f8808cae7a8 [unknown] ([unknown]) # # swapper 0 [005] 5076.836336: cpu-clock: # ffffffff81051586 native_safe_halt ([kernel.kallsyms]) # ffffffff8101db4f default_idle ([kernel.kallsyms]) # ffffffff8101e466 arch_cpu_idle ([kernel.kallsyms]) # ffffffff810c2b31 cpu_startup_entry ([kernel.kallsyms]) # ffffffff810427cd start_secondary ([kernel.kallsyms]) # if ($line =~ / \d+ \[(\d+)\] +(\S+): (\S+):/) { ($cpu, $ts, $event) = ($1, $2, $3); $run = 0; # attempt to pull in stack: @stack = (); while (1) { $next = <>; goto doexit unless defined $next; if ($next =~ / \d+ \[(\d+)\] +(\S+): (\S+):/) { # not a stack goto process; } push(@stack, $next); } process: # skip idle (add extra idle functions here): if (scalar @stack > 0) { goto done if ($line =~ /swapper/) and grep(/(cpu_idle|cpu_bringup_and_idle|native_safe_halt|xen_hypercall_sched_op|xen_hypercall_vcpu_op)/, @stack); } else { goto done if $line =~ /swapper .* (cpu_idle|cpu_bringup_and_idle|native_safe_halt|xen_hypercall_sched_op|xen_hypercall_vcpu_op)/; } # # Run Detection # # Maintain an array that contains the CPU that the last few # samples were running on (@lastcpu). I then check the current # $cpu (which at this point is a non-idle running task), vs # the array, and if the CPU is identical across all samples # then we've detected a run on one CPU while others were idle. # $run = 1; for (my $i = 0; $i < $minsamples; $i++) { $run = 0 if $cpu != $lastcpu[$i]; } unshift(@lastcpu, $cpu); pop(@lastcpu); } if (1) { $ts =~ tr/\./ /; print "$ts\n"; } elsif ($run) { print $line; print @stack if scalar @stack > 0; } done: if (defined $next) { $line = $next; goto haveline; } doexit: }