Zum Inhalt springen

Capstone Engine

Capstone ist eine leichtgewichtige, plattformübergreifende Disassemblierungs-Engine mit einer sauberen API zum Disassemblieren von Maschinencode. Es unterstützt x86, ARM, AArch64, MIPS, PowerPC, SPARC, SystemZ und mehr, mit Bindings für Python, C, Java, Go und andere Sprachen.

Installation

# Python binding
pip install capstone

# From source (C library + Python)
git clone https://github.com/capstone-engine/capstone.git
cd capstone
mkdir build && cd build
cmake ..
make -j$(nproc)
sudo make install

# Debian/Ubuntu
sudo apt-get install libcapstone-dev python3-capstone

# macOS
brew install capstone
pip install capstone

Python API: Basic Disassembly

x86-64 Disassembly

from capstone import *

# x86-64 machine code
CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\x89\x7d\xfc\xb8\x00\x00\x00\x00\xc9\xc3"

# Create disassembler for x86-64
md = Cs(CS_ARCH_X86, CS_MODE_64)

for insn in md.disasm(CODE, 0x1000):
    print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")

x86 32-bit

from capstone import *

CODE = b"\x55\x89\xe5\x83\xec\x08\xe8\x00\x00\x00\x00\x5d\xc9\xc3"

md = Cs(CS_ARCH_X86, CS_MODE_32)
for insn in md.disasm(CODE, 0x8048000):
    print(f"0x{insn.address:x}:\t{insn.mnemonic}\t{insn.op_str}")

Architektur-Unterstützung

from capstone import *

# Architecture and mode constants
# CS_ARCH_X86      - x86/x86-64
# CS_ARCH_ARM      - ARM (32-bit)
# CS_ARCH_ARM64    - AArch64
# CS_ARCH_MIPS     - MIPS
# CS_ARCH_PPC      - PowerPC
# CS_ARCH_SPARC    - SPARC
# CS_ARCH_SYSZ     - SystemZ
# CS_ARCH_XCORE    - XCore
# CS_ARCH_RISCV    - RISC-V

# Mode constants
# CS_MODE_32       - 32-bit
# CS_MODE_64       - 64-bit
# CS_MODE_ARM      - ARM mode
# CS_MODE_THUMB    - Thumb mode
# CS_MODE_LITTLE_ENDIAN - Little endian (default)
# CS_MODE_BIG_ENDIAN    - Big endian

# ARM 32-bit
md_arm = Cs(CS_ARCH_ARM, CS_MODE_ARM)

# ARM Thumb mode
md_thumb = Cs(CS_ARCH_ARM, CS_MODE_THUMB)

# AArch64
md_arm64 = Cs(CS_ARCH_ARM64, CS_MODE_ARM)

# MIPS 32-bit little-endian
md_mips = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_LITTLE_ENDIAN)

# MIPS 64-bit big-endian
md_mips64 = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)

# RISC-V 64-bit
md_riscv = Cs(CS_ARCH_RISCV, CS_MODE_RISCV64)

Instruktionsdetails

from capstone import *
from capstone.x86 import *

CODE = b"\x48\x8b\x45\xf8\x48\x01\xc3\x0f\xbe\x00"

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True  # Enable detailed instruction info

for insn in md.disasm(CODE, 0x1000):
    print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
    print(f"  Size: {insn.size} bytes")
    print(f"  Bytes: {insn.bytes.hex()}")

    # Implicit registers read/written
    if insn.regs_read:
        regs_read = [insn.reg_name(r) for r in insn.regs_read]
        print(f"  Reads: {regs_read}")
    if insn.regs_write:
        regs_write = [insn.reg_name(r) for r in insn.regs_write]
        print(f"  Writes: {regs_write}")

    # Groups (jump, call, ret, etc.)
    if insn.groups:
        groups = [insn.group_name(g) for g in insn.groups]
        print(f"  Groups: {groups}")

Operandenzugriff

from capstone import *
from capstone.x86 import *

CODE = b"\x48\x8b\x84\x24\x80\x00\x00\x00"  # mov rax, [rsp+0x80]

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

for insn in md.disasm(CODE, 0x1000):
    print(f"{insn.mnemonic} {insn.op_str}")

    for i, op in enumerate(insn.operands):
        if op.type == X86_OP_REG:
            print(f"  Operand {i}: REG = {insn.reg_name(op.reg)}")
        elif op.type == X86_OP_IMM:
            print(f"  Operand {i}: IMM = 0x{op.imm:x}")
        elif op.type == X86_OP_MEM:
            mem = op.mem
            print(f"  Operand {i}: MEM")
            if mem.base:
                print(f"    Base: {insn.reg_name(mem.base)}")
            if mem.index:
                print(f"    Index: {insn.reg_name(mem.index)}")
            if mem.scale != 1:
                print(f"    Scale: {mem.scale}")
            if mem.disp:
                print(f"    Disp: 0x{mem.disp:x}")

ARM-Operandendetails

from capstone import *
from capstone.arm import *

CODE = b"\x04\xe0\x2d\xe5"  # push {lr}

md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True

for insn in md.disasm(CODE, 0x1000):
    print(f"{insn.mnemonic} {insn.op_str}")
    for i, op in enumerate(insn.operands):
        if op.type == ARM_OP_REG:
            print(f"  Operand {i}: REG = {insn.reg_name(op.reg)}")
        elif op.type == ARM_OP_IMM:
            print(f"  Operand {i}: IMM = 0x{op.imm:x}")
        elif op.type == ARM_OP_MEM:
            print(f"  Operand {i}: MEM base={insn.reg_name(op.mem.base)}")

Gruppenklassifizierung

from capstone import *

CODE = b"\xe8\x10\x00\x00\x00\xeb\x05\xc3\xff\xd0\x0f\x84\x05\x00\x00\x00"

md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True

for insn in md.disasm(CODE, 0x1000):
    # Check instruction groups
    is_jump = insn.group(CS_GRP_JUMP)
    is_call = insn.group(CS_GRP_CALL)
    is_ret = insn.group(CS_GRP_RET)
    is_int = insn.group(CS_GRP_INT)

    flags = []
    if is_jump: flags.append("JUMP")
    if is_call: flags.append("CALL")
    if is_ret:  flags.append("RET")
    if is_int:  flags.append("INT")

    label = f" [{', '.join(flags)}]" if flags else ""
    print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}{label}")

Iterating with disasm_lite

from capstone import *

CODE = b"\x55\x48\x89\xe5\x48\x83\xec\x10\xc9\xc3"

md = Cs(CS_ARCH_X86, CS_MODE_64)

# disasm_lite is faster (returns tuples instead of objects)
for addr, size, mnemonic, op_str in md.disasm_lite(CODE, 0x1000):
    print(f"0x{addr:x}: {mnemonic}\t{op_str}")

C-API

#include <capstone/capstone.h>
#include <stdio.h>

int main(void) {
    csh handle;
    cs_insn *insn;
    size_t count;

    uint8_t code[] = {0x55, 0x48, 0x89, 0xe5, 0xc3};

    // Initialize for x86-64
    if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK)
        return -1;

    // Disassemble
    count = cs_disasm(handle, code, sizeof(code), 0x1000, 0, &insn);
    if (count > 0) {
        for (size_t i = 0; i < count; i++) {
            printf("0x%lx:\t%s\t%s\n",
                   insn[i].address, insn[i].mnemonic, insn[i].op_str);
        }
        cs_free(insn, count);
    }

    cs_close(&handle);
    return 0;
}
# Compile C program
gcc disasm.c -o disasm -lcapstone

Skipdata-Modus

from capstone import *

# Handle data mixed with code (don't stop on invalid instructions)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.skipdata = True
md.skipdata_setup = ("db", None, None)  # Label skipped data as "db"

MIXED = b"\x55\x48\x89\xe5\xff\xff\xff\xc9\xc3"
for insn in md.disasm(MIXED, 0x1000):
    print(f"0x{insn.address:x}: {insn.mnemonic}\t{insn.op_str}")

Integration mit Unicorn

from capstone import *
from unicorn import *
from unicorn.x86_const import *

# Disassemble what Unicorn emulates
def hook_code(uc, address, size, user_data):
    code = uc.mem_read(address, size)
    md = user_data
    for insn in md.disasm(bytes(code), address):
        print(f">>> 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")

md = Cs(CS_ARCH_X86, CS_MODE_64)
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Pass Capstone instance to Unicorn hook for disassembly
mu.hook_add(UC_HOOK_CODE, hook_code, user_data=md)