Capstone Engine
Capstone은 머신 코드 디스어셈블리를 위한 깔끔한 API를 제공하는 경량 멀티 플랫폼 디스어셈블리 엔진입니다. Python, C, Java, Go 등의 바인딩과 함께 x86, ARM, AArch64, MIPS, PowerPC, SPARC, SystemZ 등을 지원합니다.
설치
# Python binding
pip install capstone
# From source (C library + Python)
git clone https://github.com/capstone-engine/capstone.git
cd capstone
mkdir build && cd build
cmake ..
make -j$(nproc)
sudo make install
# Debian/Ubuntu
sudo apt-get install libcapstone-dev python3-capstone
# macOS
brew install capstone
pip install capstone
Python API: Basic Disassembly
x86-64 Disassembly
from capstone import *
# x86-64 machine code
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\x89\x7d\xfc\xb8\x00\x00\x00\x00\xc9\xc3"
# Create disassembler for x86-64
md = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in md.disasm(CODE, 0x1000):
print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")
x86 32-bit
from capstone import *
CODE = b"\x55\x89\xe5\x83\xec\x08\xe8\x00\x00\x00\x00\x5d\xc9\xc3"
md = Cs(CS_ARCH_X86, CS_MODE_32)
for insn in md.disasm(CODE, 0x8048000):
print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")
Architecture Support
from capstone import *
# Architecture and mode constants
# CS_ARCH_X86 - x86/x86-64
# CS_ARCH_ARM - ARM (32-bit)
# CS_ARCH_ARM64 - AArch64
# CS_ARCH_MIPS - MIPS
# CS_ARCH_PPC - PowerPC
# CS_ARCH_SPARC - SPARC
# CS_ARCH_SYSZ - SystemZ
# CS_ARCH_XCORE - XCore
# CS_ARCH_RISCV - RISC-V
# Mode constants
# CS_MODE_32 - 32-bit
# CS_MODE_64 - 64-bit
# CS_MODE_ARM - ARM mode
# CS_MODE_THUMB - Thumb mode
# CS_MODE_LITTLE_ENDIAN - Little endian (default)
# CS_MODE_BIG_ENDIAN - Big endian
# ARM 32-bit
md_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)
# ARM Thumb mode
md_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
# AArch64
md_arm64 = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
# MIPS 32-bit little-endian
md_mips = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_LITTLE_ENDIAN)
# MIPS 64-bit big-endian
md_mips64 = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
# RISC-V 64-bit
md_riscv = Cs(CS_ARCH_RISCV, CS_MODE_RISCV64)
Instruction Details
from capstone import *
from capstone.x86 import *
CODE = b"\x48\x8b\x45\xf8\x48\x01\xc3\x0f\xbe\x00"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True # Enable detailed instruction info
for insn in md.disasm(CODE, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
print(f" Size: {insn.size} bytes")
print(f" Bytes: {insn.bytes.hex()}")
# Implicit registers read/written
if insn.regs_read:
regs_read = [insn.reg_name(r) for r in insn.regs_read]
print(f" Reads: {regs_read}")
if insn.regs_write:
regs_write = [insn.reg_name(r) for r in insn.regs_write]
print(f" Writes: {regs_write}")
# Groups (jump, call, ret, etc.)
if insn.groups:
groups = [insn.group_name(g) for g in insn.groups]
print(f" Groups: {groups}")
Operand Access
from capstone import *
from capstone.x86 import *
CODE = b"\x48\x8b\x84\x24\x80\x00\x00\x00" # mov rax, [rsp+0x80]
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(f"{insn.mnemonic} {insn.op_str}")
for i, op in enumerate(insn.operands):
if op.type == X86_OP_REG:
print(f" Operand {i}: REG = {insn.reg_name(op.reg)}")
elif op.type == X86_OP_IMM:
print(f" Operand {i}: IMM = 0x{op.imm:x}")
elif op.type == X86_OP_MEM:
mem = op.mem
print(f" Operand {i}: MEM")
if mem.base:
print(f" Base: {insn.reg_name(mem.base)}")
if mem.index:
print(f" Index: {insn.reg_name(mem.index)}")
if mem.scale != 1:
print(f" Scale: {mem.scale}")
if mem.disp:
print(f" Disp: 0x{mem.disp:x}")
ARM Operand Details
from capstone import *
from capstone.arm import *
CODE = b"\x04\xe0\x2d\xe5" # push {lr}
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
print(f"{insn.mnemonic} {insn.op_str}")
for i, op in enumerate(insn.operands):
if op.type == ARM_OP_REG:
print(f" Operand {i}: REG = {insn.reg_name(op.reg)}")
elif op.type == ARM_OP_IMM:
print(f" Operand {i}: IMM = 0x{op.imm:x}")
elif op.type == ARM_OP_MEM:
print(f" Operand {i}: MEM base={insn.reg_name(op.mem.base)}")
Group Classification
from capstone import *
CODE = b"\xe8\x10\x00\x00\x00\xeb\x05\xc3\xff\xd0\x0f\x84\x05\x00\x00\x00"
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(CODE, 0x1000):
# Check instruction groups
is_jump = insn.group(CS_GRP_JUMP)
is_call = insn.group(CS_GRP_CALL)
is_ret = insn.group(CS_GRP_RET)
is_int = insn.group(CS_GRP_INT)
flags = []
if is_jump: flags.append("JUMP")
if is_call: flags.append("CALL")
if is_ret: flags.append("RET")
if is_int: flags.append("INT")
label = f" [{', '.join(flags)}]" if flags else ""
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}{label}")
Iterating with disasm_lite
from capstone import *
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\xc9\xc3"
md = Cs(CS_ARCH_X86, CS_MODE_64)
# disasm_lite is faster (returns tuples instead of objects)
for addr, size, mnemonic, op_str in md.disasm_lite(CODE, 0x1000):
print(f"0x{addr:x}: {mnemonic}\t{op_str}")
C API
#include <capstone/capstone.h>
#include <stdio.h>
int main(void) {
csh handle;
cs_insn *insn;
size_t count;
uint8_t code[] = {0x55, 0x48, 0x89, 0xe5, 0xc3};
// Initialize for x86-64
if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK)
return -1;
// Disassemble
count = cs_disasm(handle, code, sizeof(code), 0x1000, 0, &insn);
if (count > 0) {
for (size_t i = 0; i < count; i++) {
printf("0x%lx:\t%s\t%s\n",
insn[i].address, insn[i].mnemonic, insn[i].op_str);
}
cs_free(insn, count);
}
cs_close(&handle);
return 0;
}
# Compile C program
gcc disasm.c -o disasm -lcapstone
Skipdata Mode
from capstone import *
# Handle data mixed with code (don't stop on invalid instructions)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.skipdata = True
md.skipdata_setup = ("db", None, None) # Label skipped data as "db"
MIXED = b"\x55\x48\x89\xe5\xff\xff\xff\xc9\xc3"
for insn in md.disasm(MIXED, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic}\t{insn.op_str}")
Integration with Unicorn
from capstone import *
from unicorn import *
from unicorn.x86_const import *
# Disassemble what Unicorn emulates
def hook_code(uc, address, size, user_data):
code = uc.mem_read(address, size)
md = user_data
for insn in md.disasm(bytes(code), address):
print(f">>> 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Pass Capstone instance to Unicorn hook for disassembly
mu.hook_add(UC_HOOK_CODE, hook_code, user_data=md)