Capstone Engine
Capstone è un motore di disassemblaggio leggero e multipiattaforma che fornisce un’API pulita per il disassemblaggio del codice macchina. Supporta x86, ARM, AArch64, MIPS, PowerPC, SPARC, SystemZ e altro, con binding per Python, C, Java, Go e altri linguaggi.
Installazione
# Python binding
pip install capstone
# From source (C library + Python)
git clone https://github.com/capstone-engine/capstone.git
cd capstone
mkdir build && cd build
cmake ..
make -j$(nproc)
sudo make install
# Debian/Ubuntu
sudo apt-get install libcapstone-dev python3-capstone
# macOS
brew install capstone
pip install capstone
Python API: Basic Disassembly
x86-64 Disassembly
from capstone import *
# x86-64 machine code
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\x89\x7d\xfc\xb8\x00\x00\x00\x00\xc9\xc3"
# Create disassembler for x86-64
md = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in md.disasm(CODE, 0x1000):
print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")
x86 32-bit
from capstone import *
CODE = b"\x55\x89\xe5\x83\xec\x08\xe8\x00\x00\x00\x00\x5d\xc9\xc3"
md = Cs(CS_ARCH_X86, CS_MODE_32)
for insn in md.disasm(CODE, 0x8048000):
print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")
Supporto architetture
from capstone import *
# Architecture and mode constants
# CS_ARCH_X86 - x86/x86-64
# CS_ARCH_ARM - ARM (32-bit)
# CS_ARCH_ARM64 - AArch64
# CS_ARCH_MIPS - MIPS
# CS_ARCH_PPC - PowerPC
# CS_ARCH_SPARC - SPARC
# CS_ARCH_SYSZ - SystemZ
# CS_ARCH_XCORE - XCore
# CS_ARCH_RISCV - RISC-V
# Mode constants
# CS_MODE_32 - 32-bit
# CS_MODE_64 - 64-bit
# CS_MODE_ARM - ARM mode
# CS_MODE_THUMB - Thumb mode
# CS_MODE_LITTLE_ENDIAN - Little endian (default)
# CS_MODE_BIG_ENDIAN - Big endian
# ARM 32-bit
md_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
# ARM Thumb mode
md_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
# AArch64
md_arm64 = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
# MIPS 32-bit little-endian
md_mips = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_LITTLE_ENDIAN)
# MIPS 64-bit big-endian
md_mips64 = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
# RISC-V 64-bit
md_riscv = Cs(CS_ARCH_RISCV, CS_MODE_RISCV64)
Dettagli istruzione
from capstone import *
from capstone.x86 import *
CODE = b"\x48\x8b\x45\xf8\x48\x01\xc3\x0f\xbe\x00"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True # Enable detailed instruction info
for insn in md.disasm(CODE, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
print(f" Size: {insn.size} bytes")
print(f" Bytes: {insn.bytes.hex()}")
# Implicit registers read/written
if insn.regs_read:
regs_read = [insn.reg_name(r) for r in insn.regs_read]
print(f" Reads: {regs_read}")
if insn.regs_write:
regs_write = [insn.reg_name(r) for r in insn.regs_write]
print(f" Writes: {regs_write}")
# Groups (jump, call, ret, etc.)
if insn.groups:
groups = [insn.group_name(g) for g in insn.groups]
print(f" Groups: {groups}")
Accesso operandi
from capstone import *
from capstone.x86 import *
CODE = b"\x48\x8b\x84\x24\x80\x00\x00\x00" # mov rax, [rsp+0x80]
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(f"{insn.mnemonic} {insn.op_str}")
for i, op in enumerate(insn.operands):
if op.type == X86_OP_REG:
print(f" Operand {i}: REG = {insn.reg_name(op.reg)}")
elif op.type == X86_OP_IMM:
print(f" Operand {i}: IMM = 0x{op.imm:x}")
elif op.type == X86_OP_MEM:
mem = op.mem
print(f" Operand {i}: MEM")
if mem.base:
print(f" Base: {insn.reg_name(mem.base)}")
if mem.index:
print(f" Index: {insn.reg_name(mem.index)}")
if mem.scale != 1:
print(f" Scale: {mem.scale}")
if mem.disp:
print(f" Disp: 0x{mem.disp:x}")
Dettagli operandi ARM
from capstone import *
from capstone.arm import *
CODE = b"\x04\xe0\x2d\xe5" # push {lr}
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(f"{insn.mnemonic} {insn.op_str}")
for i, op in enumerate(insn.operands):
if op.type == ARM_OP_REG:
print(f" Operand {i}: REG = {insn.reg_name(op.reg)}")
elif op.type == ARM_OP_IMM:
print(f" Operand {i}: IMM = 0x{op.imm:x}")
elif op.type == ARM_OP_MEM:
print(f" Operand {i}: MEM base={insn.reg_name(op.mem.base)}")
Classificazione gruppi
from capstone import *
CODE = b"\xe8\x10\x00\x00\x00\xeb\x05\xc3\xff\xd0\x0f\x84\x05\x00\x00\x00"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
# Check instruction groups
is_jump = insn.group(CS_GRP_JUMP)
is_call = insn.group(CS_GRP_CALL)
is_ret = insn.group(CS_GRP_RET)
is_int = insn.group(CS_GRP_INT)
flags = []
if is_jump: flags.append("JUMP")
if is_call: flags.append("CALL")
if is_ret: flags.append("RET")
if is_int: flags.append("INT")
label = f" [{', '.join(flags)}]" if flags else ""
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}{label}")
Iterating with disasm_lite
from capstone import *
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\xc9\xc3"
md = Cs(CS_ARCH_X86, CS_MODE_64)
# disasm_lite is faster (returns tuples instead of objects)
for addr, size, mnemonic, op_str in md.disasm_lite(CODE, 0x1000):
print(f"0x{addr:x}: {mnemonic}\t{op_str}")
API C
#include <capstone/capstone.h>
#include <stdio.h>
int main(void) {
csh handle;
cs_insn *insn;
size_t count;
uint8_t code[] = {0x55, 0x48, 0x89, 0xe5, 0xc3};
// Initialize for x86-64
if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK)
return -1;
// Disassemble
count = cs_disasm(handle, code, sizeof(code), 0x1000, 0, &insn);
if (count > 0) {
for (size_t i = 0; i < count; i++) {
printf("0x%lx:\t%s\t%s\n",
insn[i].address, insn[i].mnemonic, insn[i].op_str);
}
cs_free(insn, count);
}
cs_close(&handle);
return 0;
}
# Compile C program
gcc disasm.c -o disasm -lcapstone
Modalità skipdata
from capstone import *
# Handle data mixed with code (don't stop on invalid instructions)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.skipdata = True
md.skipdata_setup = ("db", None, None) # Label skipped data as "db"
MIXED = b"\x55\x48\x89\xe5\xff\xff\xff\xc9\xc3"
for insn in md.disasm(MIXED, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic}\t{insn.op_str}")
Integrazione con Unicorn
from capstone import *
from unicorn import *
from unicorn.x86_const import *
# Disassemble what Unicorn emulates
def hook_code(uc, address, size, user_data):
code = uc.mem_read(address, size)
md = user_data
for insn in md.disasm(bytes(code), address):
print(f">>> 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Pass Capstone instance to Unicorn hook for disassembly
mu.hook_add(UC_HOOK_CODE, hook_code, user_data=md)