Capstone is a lightweight, multi-architecture disassembly framework designed for reverse engineering and exploit development. It supports x86, x86-64, ARM, ARM64, MIPS, PowerPC, SPARC, and other architectures. Written in C with bindings for Python, Java, Go, and more, Capstone is the foundation for many reverse engineering tools like Radare2 and IDA Pro plugins.
# Via pip (easiest method)
pip install capstone
# Verify installation
python3 -c "import capstone; print(capstone.__version__)"
# From package manager
sudo apt-get install libcapstone-dev
# Or build from source
git clone https://github.com/capstone-engine/capstone.git
cd capstone
./make.sh
sudo ./make.sh install
brew install capstone
git clone https://github.com/capstone-engine/capstone.git
cd capstone
make
sudo make install
# Python bindings
cd bindings/python
python3 setup.py install
from capstone import *
# Check version
md = Cs(CS_ARCH_X86, CS_MODE_64)
print(f"Capstone version: {CS_VERSION}")
from capstone import *
# x86-64 code (mov rax, 0x123456; ret)
code = b"\x48\xc7\xc0\x56\x34\x12\x00\xc3"
# Create disassembler
md = Cs(CS_ARCH_X86, CS_MODE_64)
# Disassemble
for instr in md.disasm(code, 0x1000):
print(f"0x{instr.address:x}: {instr.mnemonic} {instr.op_str}")
# Output:
# 0x1000: mov rax, 0x123456
# 0x1007: ret
from capstone import *
# ARM Thumb code (add r0, r0, 1)
code = b"\x01\x30"
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for instr in md.disasm(code, 0x0):
print(f"{instr.address:04x}: {instr.mnemonic} {instr.op_str}")
from capstone import *
# MIPS big-endian code
code = b"\x00\x00\x00\x00" # nop (sll $zero, $zero, 0)
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
for instr in md.disasm(code, 0x400000):
print(f"0x{instr.address:x}: {instr.mnemonic} {instr.op_str}")
| Architecture | Constants | Modes |
|---|
| x86 | CS_ARCH_X86 | CS_MODE_32, CS_MODE_64 |
| ARM | CS_ARCH_ARM | CS_MODE_ARM, CS_MODE_THUMB |
| ARM64 | CS_ARCH_ARM64 | CS_MODE_ARM64 |
| MIPS | CS_ARCH_MIPS | CS_MODE_MIPS32, CS_MODE_MIPS64 |
| PowerPC | CS_ARCH_PPC | CS_MODE_32, CS_MODE_64 |
| SPARC | CS_ARCH_SPARC | CS_MODE_SPARC32, CS_MODE_SPARC64 |
from capstone import *
# x86-32
md_x86_32 = Cs(CS_ARCH_X86, CS_MODE_32)
# x86-64
md_x86_64 = Cs(CS_ARCH_X86, CS_MODE_64)
# ARM 32-bit
md_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
# ARM Thumb
md_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
# ARM 64-bit
md_arm64 = Cs(CS_ARCH_ARM64, CS_MODE_ARM64)
# MIPS 32-bit
md_mips = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32)
# PowerPC 64-bit
md_ppc = Cs(CS_ARCH_PPC, CS_MODE_PPC64)
from capstone import *
code = b"\x48\x89\xc1" # mov rcx, rax
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True # Enable detailed operand info
for instr in md.disasm(code, 0x1000):
print(f"Instruction: {instr.mnemonic} {instr.op_str}")
# Access operands
for op in instr.operands:
if op.type == X86_OP_REG:
print(f" Register: {instr.reg_name(op.reg)}")
elif op.type == X86_OP_IMM:
print(f" Immediate: 0x{op.imm:x}")
elif op.type == X86_OP_MEM:
print(f" Memory: [{op.mem.base} + {op.mem.index} * {op.mem.scale} + {op.mem.disp}]")
| Type | Constant | Description | Example |
|---|
| Register | X86_OP_REG | CPU register | rax, rbx |
| Immediate | X86_OP_IMM | Constant value | 0x1234 |
| Memory | X86_OP_MEM | Memory reference | [rax + rbx*2 + 0x100] |
| FP | X86_OP_FP | Floating point | st(0) |
from capstone import *
code = b"\x90" # nop
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for instr in md.disasm(code, 0x0):
# Instruction attributes
print(f"ID: {instr.id}")
print(f"Mnemonic: {instr.mnemonic}")
print(f"Op String: {instr.op_str}")
print(f"Bytes: {instr.bytes.hex()}")
print(f"Address: 0x{instr.address:x}")
print(f"Size: {instr.size}")
# Groups (instruction classification)
print(f"Groups: {instr.groups}")
# Implicit registers read/written
print(f"Regs read: {instr.regs_read}")
print(f"Regs write: {instr.regs_write}")
from capstone import *
code = b"\x48\x89\xc1" # mov rcx, rax
# Intel syntax (default)
md_intel = Cs(CS_ARCH_X86, CS_MODE_64)
for instr in md_intel.disasm(code, 0x0):
print(f"Intel: {instr.mnemonic} {instr.op_str}")
# Output: Intel: mov rcx, rax
# AT&T syntax
md_att = Cs(CS_ARCH_X86, CS_MODE_64)
md_att.syntax = CS_OPT_SYNTAX_ATT
for instr in md_att.disasm(code, 0x0):
print(f"AT&T: {instr.mnemonic} {instr.op_str}")
# Output: AT&T: movq %rax, %rcx
| Option | Effect |
|---|
| CS_OPT_SYNTAX_INTEL | Intel syntax (default) |
| CS_OPT_SYNTAX_ATT | AT&T/Unix syntax |
| CS_OPT_SYNTAX_NASM | NASM assembly syntax |
| CS_OPT_SYNTAX_MASM | MASM assembly syntax |
from capstone import *
code = b"\x55\x48\x89\xe5\x48\x83\xec\x10" # Standard x86-64 prologue
md = Cs(CS_ARCH_X86, CS_MODE_64)
count = 0
for instr in md.disasm(code, 0x400000):
count += 1
print(f"{count}: 0x{instr.address:x} | {instr.bytes.hex():20s} | {instr.mnemonic:8s} {instr.op_str}")
if count >= 10:
break
from capstone import *
code = b"\x75\x05\x48\x89\xc0\xc3" # jne +5; mov rax, rax; ret
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for instr in md.disasm(code, 0x1000):
# Check if instruction is a jump
if instr.group(CS_GRP_JUMP):
print(f"Branch at 0x{instr.address:x}: {instr.mnemonic} {instr.op_str}")
# Check if instruction modifies memory
if instr.group(CS_GRP_WRITE_MEM):
print(f"Memory write at 0x{instr.address:x}: {instr.mnemonic}")
from capstone import *
code = b"\x75\x05" # jne +5
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for instr in md.disasm(code, 0x1000):
if instr.group(CS_GRP_JUMP):
# Get operand (immediate jump target)
op = instr.operands[0]
if op.type == X86_OP_IMM:
target = op.imm
print(f"Jump from 0x{instr.address:x} to 0x{target:x}")
from capstone import *
# Binary with invalid instructions mixed with valid code
code = b"\x90\x90\xFF\xFF\xFF\xFF\x90\x90" # nops, invalid bytes, nops
md = Cs(CS_ARCH_X86, CS_MODE_64)
# Skip invalid data (don't crash on bad bytes)
md.skipdata = True
md.skipdata_cb = None # Use default skip handler
count = 0
for instr in md.disasm(code, 0x0):
print(f"0x{instr.address:x}: {instr.mnemonic} {instr.op_str}")
count += 1
if count > 10:
break
from capstone import *
def skipdata_callback(buffer, address):
"""Custom handler for unrecognized bytes"""
print(f"Skipping invalid data at 0x{address:x}: {buffer.hex()}")
return 1 # Skip 1 byte
code = b"\x90\xFF\x90"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.skipdata = True
md.skipdata_cb = skipdata_callback
for instr in md.disasm(code, 0x0):
print(f"0x{instr.address:x}: {instr.mnemonic}")
from capstone import *
md = Cs(CS_ARCH_X86, CS_MODE_64)
# Get register ID by name
reg_rax = X86_REG_RAX
reg_name = md.reg_name(reg_rax)
print(f"Register ID {reg_rax} = {reg_name}")
# Iterate all x86-64 registers
x86_regs = [X86_REG_RAX, X86_REG_RBX, X86_REG_RCX, X86_REG_RDX]
for reg_id in x86_regs:
print(f" {md.reg_name(reg_id)}")
| Architecture | Examples |
|---|
| x86 | X86_REG_RAX, X86_REG_RBX, X86_REG_RCX |
| ARM | ARM_REG_R0, ARM_REG_R1, ARM_REG_SP |
| ARM64 | ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP |
| MIPS | MIPS_REG_ZERO, MIPS_REG_AT, MIPS_REG_V0 |
from capstone import *
def disassemble_file(filename, arch, mode, address=0):
"""Disassemble a binary file"""
with open(filename, 'rb') as f:
code = f.read()
md = Cs(arch, mode)
md.detail = True
for instr in md.disasm(code, address):
print(f"0x{instr.address:08x}: {instr.bytes.hex():20s} {instr.mnemonic:8s} {instr.op_str}")
# Example: Disassemble ELF binary
disassemble_file('/bin/ls', CS_ARCH_X86, CS_MODE_64, address=0x400000)
from capstone import *
import struct
def get_code_from_elf(filename):
"""Extract .text section from ELF"""
with open(filename, 'rb') as f:
elf = f.read()
# Parse ELF header (simplified)
if elf[:4] != b'\x7fELF':
print("Not an ELF file")
return None
# Use elftools library for proper parsing
from elftools.elf.elffile import ELFFile
with open(filename, 'rb') as f:
elf_file = ELFFile(f)
text = elf_file.get_section_by_name('.text')
if text:
return text.data()
return None
code = get_code_from_elf('/bin/ls')
if code:
md = Cs(CS_ARCH_X86, CS_MODE_64)
for instr in md.disasm(code, 0x400000):
print(f"0x{instr.address:x}: {instr.mnemonic} {instr.op_str}")
from capstone import *
code = b"\x75\x05\x48\x89\xc0\xc3" # jne +5; mov rax, rax; ret
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for instr in md.disasm(code, 0x0):
groups = []
if instr.group(CS_GRP_JUMP):
groups.append("JUMP")
if instr.group(CS_GRP_CALL):
groups.append("CALL")
if instr.group(CS_GRP_RET):
groups.append("RET")
if instr.group(CS_GRP_INT):
groups.append("INT")
if instr.group(CS_GRP_IRET):
groups.append("IRET")
if instr.group(CS_GRP_WRITE_MEM):
groups.append("WRITE_MEM")
if instr.group(CS_GRP_READ_MEM):
groups.append("READ_MEM")
group_str = ", ".join(groups) if groups else "NONE"
print(f"0x{instr.address:x}: {instr.mnemonic:8s} | Groups: {group_str}")
from capstone import *
code = b"\x48\x8b\x44\xc3\x10" # mov rax, [rbx + rax*8 + 0x10]
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for instr in md.disasm(code, 0x0):
print(f"{instr.mnemonic} {instr.op_str}")
for op in instr.operands:
if op.type == X86_OP_MEM:
print(f" Memory operand:")
print(f" Base: {instr.reg_name(op.mem.base)}")
print(f" Index: {instr.reg_name(op.mem.index)}")
print(f" Scale: {op.mem.scale}")
print(f" Disp: 0x{op.mem.disp:x}")
# Radare2 uses Capstone as its disassembly engine
radare2 /bin/ls
# Inside radare2:
> pdf @main # Print function disassembly
> pd 20 # Print 20 instructions
> pa # Print all functions
import gdb
from capstone import *
class DisassembleCommand(gdb.Command):
def __init__(self):
super().__init__("disasm", gdb.COMMAND_USER)
self.md = Cs(CS_ARCH_X86, CS_MODE_64)
def invoke(self, arg, from_tty):
# Get architecture from GDB
arch = gdb.selected_frame().architecture()
code = gdb.selected_inferior().read_memory(0x400000, 100)
for instr in self.md.disasm(code, 0x400000):
print(f"0x{instr.address:x}: {instr.mnemonic} {instr.op_str}")
# Register command
DisassembleCommand()
from capstone import *
code = b"\x90" * 1000
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = False # Faster, less info
# Fast disassembly
for instr in md.disasm(code, 0x0):
if instr.address % 100 == 0:
print(f"0x{instr.address:x}: {instr.mnemonic}")
from capstone import *
md = Cs(CS_ARCH_X86, CS_MODE_64)
# Print binary details
md.detail = True
# Control output format
options = [
(CS_OPT_SYNTAX_INTEL, "Intel Syntax"),
(CS_OPT_SYNTAX_ATT, "AT&T Syntax"),
]
code = b"\x48\x89\xc1"
for opt, name in options:
md.syntax = opt
print(f"\n{name}:")
for instr in md.disasm(code, 0x0):
print(f" {instr.mnemonic} {instr.op_str}")
from capstone import *
code = b"\xFF\xFF\xFF\xFF" # Invalid x86 bytes
md = Cs(CS_ARCH_X86, CS_MODE_64)
try:
count = 0
for instr in md.disasm(code, 0x0):
print(f"0x{instr.address:x}: {instr.mnemonic}")
count += 1
except Exception as e:
print(f"Disassembly error: {e}")
# With skipdata enabled
md.skipdata = True
for instr in md.disasm(code, 0x0):
print(f"0x{instr.address:x}: {instr.mnemonic}")
from capstone import *
import time
code = b"\x90" * 10000 # 10,000 nops
md = Cs(CS_ARCH_X86, CS_MODE_64)
# Method 1: Iterate (slower)
start = time.time()
for instr in md.disasm(code, 0x0):
pass
iterate_time = time.time() - start
# Method 2: Disasm all at once (faster, more memory)
start = time.time()
instructions = list(md.disasm(code, 0x0))
disasm_time = time.time() - start
print(f"Iterate: {iterate_time:.4f}s, Disasm all: {disasm_time:.4f}s")
from capstone import *
def find_function_prologues(code, address=0x400000):
"""Find x86-64 function prologues"""
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
functions = []
prev_instr = None
for instr in md.disasm(code, address):
# Common prologue: push rbp; mov rbp, rsp
if (prev_instr and
prev_instr.mnemonic == "push" and
instr.mnemonic == "mov"):
functions.append(instr.address - prev_instr.size)
prev_instr = instr
return functions
# Usage
prologues = find_function_prologues(code)
for addr in prologues:
print(f"Function at 0x{addr:x}")
from capstone import *
def extract_immediates(code):
"""Extract immediate values from code"""
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
immediates = []
for instr in md.disasm(code, 0x0):
for op in instr.operands:
if op.type == X86_OP_IMM:
immediates.append(("0x{:x}".format(op.imm), instr.address))
return immediates
from capstone import *
code = b"\x01\x00\xa0\xe3" # mov r0, #1 (executed always)
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for instr in md.disasm(code, 0x1000):
print(f"Condition: {instr.cc}")
# CC_AL = always, CC_EQ = equal, CC_NE = not equal, etc
from capstone import *
code = b"\x00\x00\x00\x00\x08\x00\x00\x00" # nop; j 0
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32)
md.detail = True
for instr in md.disasm(code, 0x400000):
# Check for delay slot requirement
if instr.group(CS_GRP_JUMP):
print(f"Jump instruction requires delay slot")
| Issue | Solution |
|---|
| Invalid architecture | Verify CS_ARCH_* constant matches binary |
| No output from disasm | Check code is not empty, address is valid |
| Wrong syntax | Set md.syntax = CS_OPT_SYNTAX_ATT for AT&T |
| Crash on bad bytes | Enable md.skipdata = True |
| Memory operands incorrect | Enable md.detail = True |