콘텐츠로 이동

Capstone Engine

Capstone은 머신 코드 디스어셈블리를 위한 깔끔한 API를 제공하는 경량 멀티 플랫폼 디스어셈블리 엔진입니다. Python, C, Java, Go 등의 바인딩과 함께 x86, ARM, AArch64, MIPS, PowerPC, SPARC, SystemZ 등을 지원합니다.

설치

# Python binding
pip install capstone

# From source (C library + Python)
git clone https://github.com/capstone-engine/capstone.git
cd capstone
mkdir build && cd build
cmake ..
make -j$(nproc)
sudo make install

# Debian/Ubuntu
sudo apt-get install libcapstone-dev python3-capstone

# macOS
brew install capstone
pip install capstone

Python API: Basic Disassembly

x86-64 Disassembly

from capstone import *

# x86-64 machine code
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\x89\x7d\xfc\xb8\x00\x00\x00\x00\xc9\xc3"

# Create disassembler for x86-64
md = Cs(CS_ARCH_X86, CS_MODE_64)

for insn in md.disasm(CODE, 0x1000):
    print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")

x86 32-bit

from capstone import *

CODE = b"\x55\x89\xe5\x83\xec\x08\xe8\x00\x00\x00\x00\x5d\xc9\xc3"

md = Cs(CS_ARCH_X86, CS_MODE_32)
for insn in md.disasm(CODE, 0x8048000):
    print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")

Architecture Support

from capstone import *

# Architecture and mode constants
# CS_ARCH_X86      - x86/x86-64
# CS_ARCH_ARM      - ARM (32-bit)
# CS_ARCH_ARM64    - AArch64
# CS_ARCH_MIPS     - MIPS
# CS_ARCH_PPC      - PowerPC
# CS_ARCH_SPARC    - SPARC
# CS_ARCH_SYSZ     - SystemZ
# CS_ARCH_XCORE    - XCore
# CS_ARCH_RISCV    - RISC-V

# Mode constants
# CS_MODE_32       - 32-bit
# CS_MODE_64       - 64-bit
# CS_MODE_ARM      - ARM mode
# CS_MODE_THUMB    - Thumb mode
# CS_MODE_LITTLE_ENDIAN - Little endian (default)
# CS_MODE_BIG_ENDIAN    - Big endian

# ARM 32-bit
md_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)

# ARM Thumb mode
md_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)

# AArch64
md_arm64 = Cs(CS_ARCH_ARM64, CS_MODE_ARM)

# MIPS 32-bit little-endian
md_mips = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_LITTLE_ENDIAN)

# MIPS 64-bit big-endian
md_mips64 = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)

# RISC-V 64-bit
md_riscv = Cs(CS_ARCH_RISCV, CS_MODE_RISCV64)

Instruction Details

from capstone import *
from capstone.x86 import *

CODE = b"\x48\x8b\x45\xf8\x48\x01\xc3\x0f\xbe\x00"

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True  # Enable detailed instruction info

for insn in md.disasm(CODE, 0x1000):
    print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
    print(f"  Size: {insn.size} bytes")
    print(f"  Bytes: {insn.bytes.hex()}")

    # Implicit registers read/written
    if insn.regs_read:
        regs_read = [insn.reg_name(r) for r in insn.regs_read]
        print(f"  Reads: {regs_read}")
    if insn.regs_write:
        regs_write = [insn.reg_name(r) for r in insn.regs_write]
        print(f"  Writes: {regs_write}")

    # Groups (jump, call, ret, etc.)
    if insn.groups:
        groups = [insn.group_name(g) for g in insn.groups]
        print(f"  Groups: {groups}")

Operand Access

from capstone import *
from capstone.x86 import *

CODE = b"\x48\x8b\x84\x24\x80\x00\x00\x00"  # mov rax, [rsp+0x80]

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

for insn in md.disasm(CODE, 0x1000):
    print(f"{insn.mnemonic} {insn.op_str}")

    for i, op in enumerate(insn.operands):
        if op.type == X86_OP_REG:
            print(f"  Operand {i}: REG = {insn.reg_name(op.reg)}")
        elif op.type == X86_OP_IMM:
            print(f"  Operand {i}: IMM = 0x{op.imm:x}")
        elif op.type == X86_OP_MEM:
            mem = op.mem
            print(f"  Operand {i}: MEM")
            if mem.base:
                print(f"    Base: {insn.reg_name(mem.base)}")
            if mem.index:
                print(f"    Index: {insn.reg_name(mem.index)}")
            if mem.scale != 1:
                print(f"    Scale: {mem.scale}")
            if mem.disp:
                print(f"    Disp: 0x{mem.disp:x}")

ARM Operand Details

from capstone import *
from capstone.arm import *

CODE = b"\x04\xe0\x2d\xe5"  # push {lr}

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True

for insn in md.disasm(CODE, 0x1000):
    print(f"{insn.mnemonic} {insn.op_str}")
    for i, op in enumerate(insn.operands):
        if op.type == ARM_OP_REG:
            print(f"  Operand {i}: REG = {insn.reg_name(op.reg)}")
        elif op.type == ARM_OP_IMM:
            print(f"  Operand {i}: IMM = 0x{op.imm:x}")
        elif op.type == ARM_OP_MEM:
            print(f"  Operand {i}: MEM base={insn.reg_name(op.mem.base)}")

Group Classification

from capstone import *

CODE = b"\xe8\x10\x00\x00\x00\xeb\x05\xc3\xff\xd0\x0f\x84\x05\x00\x00\x00"

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

for insn in md.disasm(CODE, 0x1000):
    # Check instruction groups
    is_jump = insn.group(CS_GRP_JUMP)
    is_call = insn.group(CS_GRP_CALL)
    is_ret = insn.group(CS_GRP_RET)
    is_int = insn.group(CS_GRP_INT)

    flags = []
    if is_jump: flags.append("JUMP")
    if is_call: flags.append("CALL")
    if is_ret:  flags.append("RET")
    if is_int:  flags.append("INT")

    label = f" [{', '.join(flags)}]" if flags else ""
    print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}{label}")

Iterating with disasm_lite

from capstone import *

CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\xc9\xc3"

md = Cs(CS_ARCH_X86, CS_MODE_64)

# disasm_lite is faster (returns tuples instead of objects)
for addr, size, mnemonic, op_str in md.disasm_lite(CODE, 0x1000):
    print(f"0x{addr:x}: {mnemonic}\t{op_str}")

C API

#include <capstone/capstone.h>
#include <stdio.h>

int main(void) {
    csh handle;
    cs_insn *insn;
    size_t count;

    uint8_t code[] = {0x55, 0x48, 0x89, 0xe5, 0xc3};

    // Initialize for x86-64
    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK)
        return -1;

    // Disassemble
    count = cs_disasm(handle, code, sizeof(code), 0x1000, 0, &insn);
    if (count > 0) {
        for (size_t i = 0; i < count; i++) {
            printf("0x%lx:\t%s\t%s\n",
                   insn[i].address, insn[i].mnemonic, insn[i].op_str);
        }
        cs_free(insn, count);
    }

    cs_close(&handle);
    return 0;
}
# Compile C program
gcc disasm.c -o disasm -lcapstone

Skipdata Mode

from capstone import *

# Handle data mixed with code (don't stop on invalid instructions)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.skipdata = True
md.skipdata_setup = ("db", None, None)  # Label skipped data as "db"

MIXED = b"\x55\x48\x89\xe5\xff\xff\xff\xc9\xc3"
for insn in md.disasm(MIXED, 0x1000):
    print(f"0x{insn.address:x}: {insn.mnemonic}\t{insn.op_str}")

Integration with Unicorn

from capstone import *
from unicorn import *
from unicorn.x86_const import *

# Disassemble what Unicorn emulates
def hook_code(uc, address, size, user_data):
    code = uc.mem_read(address, size)
    md = user_data
    for insn in md.disasm(bytes(code), address):
        print(f">>> 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")

md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Pass Capstone instance to Unicorn hook for disassembly
mu.hook_add(UC_HOOK_CODE, hook_code, user_data=md)