Skip to content

perf Commands

perf (perf_events) is the official Linux profiler, providing access to hardware performance counters, software events, tracepoints, and dynamic probes. It is the foundation of Linux performance analysis.

Installation

Linux/Ubuntu

# Ubuntu/Debian — install matching kernel version
sudo apt install linux-tools-$(uname -r) linux-tools-common

# Fedora/RHEL
sudo dnf install perf

# Verify installation
perf version

# Allow non-root profiling (temporary)
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
# -1 = no restrictions, 0 = allow non-sampling, 1 = allow per-process, 2 = allow per-user (default)

perf stat — Counting Events

# Basic CPU counters for a command
perf stat ls -la

# Detailed counters (-d for more detail levels)
perf stat -d -d -d ./my_program

# Specific events
perf stat -e cycles,instructions,cache-misses,cache-references ./my_program

# Per-core statistics
perf stat -a -A sleep 5

# System-wide counting for 10 seconds
perf stat -a sleep 10

# Repeat measurement 5 times for statistical accuracy
perf stat -r 5 ./my_program

# Count for a running process
perf stat -p 1234 sleep 10

# CSV output for scripting
perf stat -x, -e cycles,instructions ./my_program 2>&1

# Group events to ensure co-scheduling
perf stat -e '{cycles,instructions}' ./my_program

perf record — Sampling Profiles

# Record CPU profile at default rate (4000 Hz)
perf record ./my_program

# Record at custom frequency
perf record -F 99 ./my_program

# Record system-wide for 30 seconds
perf record -a -g sleep 30

# Record with call graph (dwarf for user-space accuracy)
perf record -g --call-graph dwarf ./my_program

# Record with call graph (fp — requires frame pointers)
perf record -g --call-graph fp ./my_program

# Record with call graph (lbr — Intel Last Branch Record)
perf record -g --call-graph lbr ./my_program

# Record specific events
perf record -e cache-misses -c 10000 ./my_program

# Record a running process for 60 seconds
perf record -p 1234 -g sleep 60

# Record with per-thread granularity
perf record -s ./my_program

# Write to a specific output file
perf record -o my_profile.data ./my_program

# Record with timestamp for correlation
perf record -T -g ./my_program

perf report — Analyzing Profiles

# Interactive TUI report
perf report

# Report from a specific file
perf report -i my_profile.data

# Report sorted by overhead
perf report --sort=overhead

# Show caller/callee (call chain)
perf report -g caller

# Flat profile (no call graph)
perf report -g none

# Report with source line info
perf report --source

# Stdio output (non-interactive)
perf report --stdio

# Filter to specific DSO (shared library)
perf report -d libc-2.31.so

# Show per-symbol details
perf report --stdio --sort=sym

# Export for flame graph generation
perf script > out.stacks

perf top — Live Profiling

# Live system-wide CPU profiling
sudo perf top

# Profile a specific process
sudo perf top -p 1234

# Show call graph
sudo perf top -g

# Profile specific events
sudo perf top -e cache-misses

# Profile specific CPU
sudo perf top -C 0

# Change sampling frequency
sudo perf top -F 999

# Show kernel symbols only
sudo perf top -K

perf annotate — Source-Level Analysis

# Annotate hot function from a recorded profile
perf annotate

# Annotate a specific symbol
perf annotate my_hot_function

# Show with source code (requires debug info)
perf annotate -l my_hot_function

# Stdio output
perf annotate --stdio my_hot_function

# From a specific data file
perf annotate -i my_profile.data my_hot_function

perf probe — Dynamic Tracing

# Add a probe on a kernel function
sudo perf probe --add tcp_sendmsg

# Add a probe with function arguments
sudo perf probe --add 'tcp_sendmsg size'

# Add a return probe
sudo perf probe --add 'tcp_sendmsg%return $retval'

# List available variables at a probe point
sudo perf probe -V tcp_sendmsg

# List defined probes
sudo perf probe --list

# Remove a probe
sudo perf probe --del tcp_sendmsg

# Record with the dynamic probe
sudo perf record -e probe:tcp_sendmsg -a sleep 10

# Add a user-space probe
perf probe -x /usr/bin/python3 --add 'main'

perf sched — Scheduler Analysis

# Record scheduler events
sudo perf sched record sleep 10

# Show scheduling latency summary
sudo perf sched latency

# Show per-CPU scheduling timeline
sudo perf sched map

# Replay scheduling events
sudo perf sched replay

# Show scheduling statistics
sudo perf sched timehist

# Show wakeup chains
sudo perf sched timehist -w

perf mem — Memory Access Profiling

# Record memory access events (requires hardware support)
sudo perf mem record ./my_program

# Report memory access profile
sudo perf mem report

# Show load/store latency
sudo perf mem report --sort=mem,sym

perf lock — Lock Contention Analysis

# Record lock events
sudo perf lock record sleep 10

# Report lock contention
sudo perf lock report

# Show lock statistics
sudo perf lock info

Hardware Counters

# List available events
perf list

# List hardware events
perf list hw

# List software events
perf list sw

# List cache events
perf list cache

# Common hardware counter combinations
perf stat -e cycles,instructions,branches,branch-misses ./my_program

# Cache analysis
perf stat -e L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses ./my_program

# TLB analysis
perf stat -e dTLB-loads,dTLB-load-misses,iTLB-loads,iTLB-load-misses ./my_program

# Branch prediction analysis
perf stat -e branches,branch-misses ./my_program

perf script — Raw Event Output

# Dump raw events for post-processing
perf script > out.perf

# Output with specific fields
perf script -F comm,pid,tid,time,event,ip,sym,dso

# Generate folded stacks for flame graphs
perf script | stackcollapse-perf.pl > out.folded

# Filter by process name
perf script -c my_program

Quick Reference

CommandPurpose
perf statCount hardware/software events
perf recordSample and record profile data
perf reportAnalyze recorded profile
perf topLive system-wide profiling
perf annotateSource/assembly annotation
perf probeDynamic tracing probes
perf schedScheduler analysis
perf memMemory access profiling
perf lockLock contention analysis
perf scriptRaw event dump for scripting
perf listList available events