Zum Inhalt springen

Keystone Engine

Keystone ist eine leichtgewichtige Multi-Architektur-Assembler-Engine. Es bietet eine einfache API zum Assemblieren von Instruktionen für x86, ARM, AArch64, MIPS, PowerPC, SPARC und andere Architekturen und wird häufig zusammen mit Capstone (Disassembler) und Unicorn (Emulator) für binäre Analyse-Workflows verwendet.

Installation

# Python binding
pip install keystone-engine

# From source
git clone https://github.com/keystone-engine/keystone.git
cd keystone
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
make -j$(nproc)
sudo make install

# Update linker cache
sudo ldconfig

# Install Python binding from source
cd ../bindings/python
pip install .

# macOS
brew install keystone
pip install keystone-engine

Python API: Basic Assembly

x86-64 Assembly

from keystone import *

# Create assembler for x86-64
ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Assemble a single instruction
encoding, count = ks.asm("mov rax, rbx")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions assembled: {count}")

# Assemble multiple instructions (semicolon-separated)
encoding, count = ks.asm("push rbp; mov rbp, rsp; sub rsp, 0x20; pop rbp; ret")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")

x86 32-bit

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_32)
encoding, count = ks.asm("push ebp; mov ebp, esp; xor eax, eax; pop ebp; ret")
print(f"Shellcode: {bytes(encoding).hex()}")

ARM Assembly

from keystone import *

# ARM 32-bit
ks_arm = Ks(KS_ARCH_ARM, KS_MODE_ARM)
encoding, count = ks_arm.asm("mov r0, #5; mov r1, #10; add r2, r0, r1; bx lr")
print(f"ARM: {bytes(encoding).hex()}")

# ARM Thumb mode
ks_thumb = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
encoding, count = ks_thumb.asm("movs r0, #5; movs r1, #10; adds r2, r0, r1")
print(f"Thumb: {bytes(encoding).hex()}")

# AArch64
ks_arm64 = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_arm64.asm("mov x0, #5; mov x1, #10; add x2, x0, x1; ret")
print(f"AArch64: {bytes(encoding).hex()}")

Architektur-Unterstützung

from keystone import *

# Architecture and mode constants
# KS_ARCH_X86      - x86/x86-64
# KS_ARCH_ARM      - ARM (32-bit)
# KS_ARCH_ARM64    - AArch64
# KS_ARCH_MIPS     - MIPS
# KS_ARCH_PPC      - PowerPC
# KS_ARCH_SPARC    - SPARC
# KS_ARCH_SYSTEMZ  - SystemZ
# KS_ARCH_HEXAGON  - Hexagon
# KS_ARCH_EVM      - Ethereum VM

# Mode constants
# KS_MODE_32             - 32-bit
# KS_MODE_64             - 64-bit
# KS_MODE_ARM            - ARM mode
# KS_MODE_THUMB          - Thumb mode
# KS_MODE_LITTLE_ENDIAN  - Little endian (default)
# KS_MODE_BIG_ENDIAN     - Big endian

# MIPS 32-bit little-endian
ks_mips = Ks(KS_ARCH_MIPS, KS_MODE_MIPS32 + KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_mips.asm("addiu $t0, $zero, 5")
print(f"MIPS: {bytes(encoding).hex()}")

# PowerPC 64-bit big-endian
ks_ppc = Ks(KS_ARCH_PPC, KS_MODE_PPC64 + KS_MODE_BIG_ENDIAN)
encoding, count = ks_ppc.asm("li 3, 1; li 0, 1; sc")
print(f"PPC64: {bytes(encoding).hex()}")

Symbole auflösen

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Assemble with labels
code = """
    start:
        xor rax, rax
        test rdi, rdi
        jz done
        mov rax, rdi
        add rax, rsi
    done:
        ret
"""
encoding, count = ks.asm(code)
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")

Custom Symbol Resolver

from keystone import *

# Define a symbol resolver callback
def sym_resolver(symbol, value):
    """Resolve external symbols to addresses."""
    symbols = {
        b"printf": 0x7fff00001000,
        b"malloc": 0x7fff00002000,
        b"free":   0x7fff00003000,
    }
    if symbol in symbols:
        return True, symbols[symbol]
    return False, 0

ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.sym_resolver = sym_resolver

# Assemble code referencing external symbols
encoding, count = ks.asm("call printf", addr=0x400000)
print(f"Call printf: {bytes(encoding).hex()}")

Assemblierung an bestimmter Adresse

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Specify the address for relative instruction encoding
# This matters for relative jumps and calls
encoding, count = ks.asm("jmp 0x401000", addr=0x400000)
print(f"Jump from 0x400000 to 0x401000: {bytes(encoding).hex()}")

encoding, count = ks.asm("call 0x401000", addr=0x400000)
print(f"Call from 0x400000 to 0x401000: {bytes(encoding).hex()}")

Syntax-Optionen

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Intel syntax (default)
ks.syntax = KS_OPT_SYNTAX_INTEL
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"Intel: {bytes(encoding).hex()}")

# AT&T syntax
ks.syntax = KS_OPT_SYNTAX_ATT
encoding, _ = ks.asm("movq -0x10(%rbp), %rax")
print(f"AT&T: {bytes(encoding).hex()}")

# NASM syntax
ks.syntax = KS_OPT_SYNTAX_NASM
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"NASM: {bytes(encoding).hex()}")

Fehlerbehandlung

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

try:
    encoding, count = ks.asm("invalid_instruction rax, rbx")
except KsError as e:
    print(f"Assembly error: {e}")
    print(f"Error code: {e.errno}")

# Check assembly count for partial failures
code = "nop; nop; nop"
encoding, count = ks.asm(code)
if count < 3:
    print(f"Warning: only {count} of 3 instructions assembled")

C-API

#include <keystone/keystone.h>
#include <stdio.h>

int main(void) {
    ks_engine *ks;
    unsigned char *encode;
    size_t size, count;

    // Initialize Keystone for x86-64
    if (ks_open(KS_ARCH_X86, KS_MODE_64, &ks) != KS_ERR_OK) {
        printf("Failed to initialize Keystone\n");
        return -1;
    }

    // Assemble instructions
    if (ks_asm(ks, "mov rax, rbx; ret", 0, &encode, &size, &count) != KS_ERR_OK) {
        printf("Assembly error: %s\n", ks_strerror(ks_errno(ks)));
    } else {
        printf("Assembled %zu instructions, %zu bytes: ", count, size);
        for (size_t i = 0; i < size; i++) {
            printf("%02x ", encode[i]);
        }
        printf("\n");
        ks_free(encode);
    }

    ks_close(ks);
    return 0;
}
# Compile
gcc asm.c -o asm -lkeystone

Integration: Assemble, Emulate, Disassemble

from keystone import Ks, KS_ARCH_X86, KS_MODE_64
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from unicorn import Uc, UC_ARCH_X86, UC_MODE_64
from unicorn.x86_const import UC_X86_REG_RAX, UC_X86_REG_RDI, UC_X86_REG_RSI

# Step 1: Assemble
ks = Ks(KS_ARCH_X86, KS_MODE_64)
code, _ = ks.asm("mov rax, rdi; add rax, rsi; ret")
shellcode = bytes(code)

# Step 2: Disassemble to verify
cs = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in cs.disasm(shellcode, 0x1000):
    print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")

# Step 3: Emulate
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
mu.mem_write(0x1000, shellcode)
mu.reg_write(UC_X86_REG_RDI, 40)
mu.reg_write(UC_X86_REG_RSI, 2)
mu.emu_start(0x1000, 0x1000 + len(shellcode) - 1)
print(f"Result: {mu.reg_read(UC_X86_REG_RAX)}")  # 42

Shellcode erzeugen

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Linux x86-64 execve("/bin/sh") shellcode
shellcode_asm = """
    xor rsi, rsi
    push rsi
    mov rdi, 0x68732f2f6e69622f
    push rdi
    mov rdi, rsp
    xor rdx, rdx
    mov al, 59
    syscall
"""

encoding, count = ks.asm(shellcode_asm)
sc = bytes(encoding)

# Print as C array
print("unsigned char shellcode[] = {")
print("    " + ", ".join(f"0x{b:02x}" for b in sc))
print("};")
print(f"// Length: {len(sc)} bytes")