Commandes Capstone
Capstone est un moteur de désassemblage léger et multiplateforme qui fournit une API propre et uniforme pour le désassemblage sur plusieurs architectures dont x86, ARM, MIPS, PowerPC et plus.
Installation
# Python binding
pip install capstone
# From source (C library + Python)
git clone https://github.com/capstone-engine/capstone.git
cd capstone
mkdir build && cd build
cmake ..
make -j$(nproc)
sudo make install
# Debian/Ubuntu
sudo apt-get install libcapstone-dev python3-capstone
# macOS
brew install capstone
pip install capstone
API Python : désassemblage de base
Désassemblage x86-64
from capstone import *
# x86-64 machine code
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\x89\x7d\xfc\xb8\x00\x00\x00\x00\xc9\xc3"
# Create disassembler for x86-64
md = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in md.disasm(CODE, 0x1000):
print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")
x86 32 bits
from capstone import *
CODE = b"\x55\x89\xe5\x83\xec\x08\xe8\x00\x00\x00\x00\x5d\xc9\xc3"
md = Cs(CS_ARCH_X86, CS_MODE_32)
for insn in md.disasm(CODE, 0x8048000):
print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")
Support des architectures
from capstone import *
# Architecture and mode constants
# CS_ARCH_X86 - x86/x86-64
# CS_ARCH_ARM - ARM (32-bit)
# CS_ARCH_ARM64 - AArch64
# CS_ARCH_MIPS - MIPS
# CS_ARCH_PPC - PowerPC
# CS_ARCH_SPARC - SPARC
# CS_ARCH_SYSZ - SystemZ
# CS_ARCH_XCORE - XCore
# CS_ARCH_RISCV - RISC-V
# Mode constants
# CS_MODE_32 - 32-bit
# CS_MODE_64 - 64-bit
# CS_MODE_ARM - ARM mode
# CS_MODE_THUMB - Thumb mode
# CS_MODE_LITTLE_ENDIAN - Little endian (default)
# CS_MODE_BIG_ENDIAN - Big endian
# ARM 32-bit
md_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
# ARM Thumb mode
md_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
# AArch64
md_arm64 = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
# MIPS 32-bit little-endian
md_mips = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_LITTLE_ENDIAN)
# MIPS 64-bit big-endian
md_mips64 = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
# RISC-V 64-bit
md_riscv = Cs(CS_ARCH_RISCV, CS_MODE_RISCV64)
Détails des instructions
from capstone import *
from capstone.x86 import *
CODE = b"\x48\x8b\x45\xf8\x48\x01\xc3\x0f\xbe\x00"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True # Enable detailed instruction info
for insn in md.disasm(CODE, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
print(f" Size: {insn.size} bytes")
print(f" Bytes: {insn.bytes.hex()}")
# Implicit registers read/written
if insn.regs_read:
regs_read = [insn.reg_name(r) for r in insn.regs_read]
print(f" Reads: {regs_read}")
if insn.regs_write:
regs_write = [insn.reg_name(r) for r in insn.regs_write]
print(f" Writes: {regs_write}")
# Groups (jump, call, ret, etc.)
if insn.groups:
groups = [insn.group_name(g) for g in insn.groups]
print(f" Groups: {groups}")
Accès aux opérandes
from capstone import *
from capstone.x86 import *
CODE = b"\x48\x8b\x84\x24\x80\x00\x00\x00" # mov rax, [rsp+0x80]
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(f"{insn.mnemonic} {insn.op_str}")
for i, op in enumerate(insn.operands):
if op.type == X86_OP_REG:
print(f" Operand {i}: REG = {insn.reg_name(op.reg)}")
elif op.type == X86_OP_IMM:
print(f" Operand {i}: IMM = 0x{op.imm:x}")
elif op.type == X86_OP_MEM:
mem = op.mem
print(f" Operand {i}: MEM")
if mem.base:
print(f" Base: {insn.reg_name(mem.base)}")
if mem.index:
print(f" Index: {insn.reg_name(mem.index)}")
if mem.scale != 1:
print(f" Scale: {mem.scale}")
if mem.disp:
print(f" Disp: 0x{mem.disp:x}")
Détails des opérandes ARM
from capstone import *
from capstone.arm import *
CODE = b"\x04\xe0\x2d\xe5" # push {lr}
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(f"{insn.mnemonic} {insn.op_str}")
for i, op in enumerate(insn.operands):
if op.type == ARM_OP_REG:
print(f" Operand {i}: REG = {insn.reg_name(op.reg)}")
elif op.type == ARM_OP_IMM:
print(f" Operand {i}: IMM = 0x{op.imm:x}")
elif op.type == ARM_OP_MEM:
print(f" Operand {i}: MEM base={insn.reg_name(op.mem.base)}")
Classification de groupes
from capstone import *
CODE = b"\xe8\x10\x00\x00\x00\xeb\x05\xc3\xff\xd0\x0f\x84\x05\x00\x00\x00"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
# Check instruction groups
is_jump = insn.group(CS_GRP_JUMP)
is_call = insn.group(CS_GRP_CALL)
is_ret = insn.group(CS_GRP_RET)
is_int = insn.group(CS_GRP_INT)
flags = []
if is_jump: flags.append("JUMP")
if is_call: flags.append("CALL")
if is_ret: flags.append("RET")
if is_int: flags.append("INT")
label = f" [{', '.join(flags)}]" if flags else ""
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}{label}")
Itération avec disasm_lite
from capstone import *
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\xc9\xc3"
md = Cs(CS_ARCH_X86, CS_MODE_64)
# disasm_lite is faster (returns tuples instead of objects)
for addr, size, mnemonic, op_str in md.disasm_lite(CODE, 0x1000):
print(f"0x{addr:x}: {mnemonic}\t{op_str}")
API C
#include <capstone/capstone.h>
#include <stdio.h>
int main(void) {
csh handle;
cs_insn *insn;
size_t count;
uint8_t code[] = {0x55, 0x48, 0x89, 0xe5, 0xc3};
// Initialize for x86-64
if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK)
return -1;
// Disassemble
count = cs_disasm(handle, code, sizeof(code), 0x1000, 0, &insn);
if (count > 0) {
for (size_t i = 0; i < count; i++) {
printf("0x%lx:\t%s\t%s\n",
insn[i].address, insn[i].mnemonic, insn[i].op_str);
}
cs_free(insn, count);
}
cs_close(&handle);
return 0;
}
# Compile C program
gcc disasm.c -o disasm -lcapstone
Mode Skipdata
from capstone import *
# Handle data mixed with code (don't stop on invalid instructions)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.skipdata = True
md.skipdata_setup = ("db", None, None) # Label skipped data as "db"
MIXED = b"\x55\x48\x89\xe5\xff\xff\xff\xc9\xc3"
for insn in md.disasm(MIXED, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic}\t{insn.op_str}")
Intégration avec Unicorn
from capstone import *
from unicorn import *
from unicorn.x86_const import *
# Disassemble what Unicorn emulates
def hook_code(uc, address, size, user_data):
code = uc.mem_read(address, size)
md = user_data
for insn in md.disasm(bytes(code), address):
print(f">>> 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Pass Capstone instance to Unicorn hook for disassembly
mu.hook_add(UC_HOOK_CODE, hook_code, user_data=md)