Keystone Engine
Keystone ist eine leichtgewichtige Multi-Architektur-Assembler-Engine. Es bietet eine einfache API zum Assemblieren von Instruktionen für x86, ARM, AArch64, MIPS, PowerPC, SPARC und andere Architekturen und wird häufig zusammen mit Capstone (Disassembler) und Unicorn (Emulator) für binäre Analyse-Workflows verwendet.
Installation
# Python binding
pip install keystone-engine
# From source
git clone https://github.com/keystone-engine/keystone.git
cd keystone
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
make -j$(nproc)
sudo make install
# Update linker cache
sudo ldconfig
# Install Python binding from source
cd ../bindings/python
pip install .
# macOS
brew install keystone
pip install keystone-engine
Python API: Basic Assembly
x86-64 Assembly
from keystone import *
# Create assembler for x86-64
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Assemble a single instruction
encoding, count = ks.asm("mov rax, rbx")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions assembled: {count}")
# Assemble multiple instructions (semicolon-separated)
encoding, count = ks.asm("push rbp; mov rbp, rsp; sub rsp, 0x20; pop rbp; ret")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")
x86 32-bit
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_32)
encoding, count = ks.asm("push ebp; mov ebp, esp; xor eax, eax; pop ebp; ret")
print(f"Shellcode: {bytes(encoding).hex()}")
ARM Assembly
from keystone import *
# ARM 32-bit
ks_arm = Ks(KS_ARCH_ARM, KS_MODE_ARM)
encoding, count = ks_arm.asm("mov r0, #5; mov r1, #10; add r2, r0, r1; bx lr")
print(f"ARM: {bytes(encoding).hex()}")
# ARM Thumb mode
ks_thumb = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
encoding, count = ks_thumb.asm("movs r0, #5; movs r1, #10; adds r2, r0, r1")
print(f"Thumb: {bytes(encoding).hex()}")
# AArch64
ks_arm64 = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_arm64.asm("mov x0, #5; mov x1, #10; add x2, x0, x1; ret")
print(f"AArch64: {bytes(encoding).hex()}")
Architektur-Unterstützung
from keystone import *
# Architecture and mode constants
# KS_ARCH_X86 - x86/x86-64
# KS_ARCH_ARM - ARM (32-bit)
# KS_ARCH_ARM64 - AArch64
# KS_ARCH_MIPS - MIPS
# KS_ARCH_PPC - PowerPC
# KS_ARCH_SPARC - SPARC
# KS_ARCH_SYSTEMZ - SystemZ
# KS_ARCH_HEXAGON - Hexagon
# KS_ARCH_EVM - Ethereum VM
# Mode constants
# KS_MODE_32 - 32-bit
# KS_MODE_64 - 64-bit
# KS_MODE_ARM - ARM mode
# KS_MODE_THUMB - Thumb mode
# KS_MODE_LITTLE_ENDIAN - Little endian (default)
# KS_MODE_BIG_ENDIAN - Big endian
# MIPS 32-bit little-endian
ks_mips = Ks(KS_ARCH_MIPS, KS_MODE_MIPS32 + KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_mips.asm("addiu $t0, $zero, 5")
print(f"MIPS: {bytes(encoding).hex()}")
# PowerPC 64-bit big-endian
ks_ppc = Ks(KS_ARCH_PPC, KS_MODE_PPC64 + KS_MODE_BIG_ENDIAN)
encoding, count = ks_ppc.asm("li 3, 1; li 0, 1; sc")
print(f"PPC64: {bytes(encoding).hex()}")
Symbole auflösen
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Assemble with labels
code = """
start:
xor rax, rax
test rdi, rdi
jz done
mov rax, rdi
add rax, rsi
done:
ret
"""
encoding, count = ks.asm(code)
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")
Custom Symbol Resolver
from keystone import *
# Define a symbol resolver callback
def sym_resolver(symbol, value):
"""Resolve external symbols to addresses."""
symbols = {
b"printf": 0x7fff00001000,
b"malloc": 0x7fff00002000,
b"free": 0x7fff00003000,
}
if symbol in symbols:
return True, symbols[symbol]
return False, 0
ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.sym_resolver = sym_resolver
# Assemble code referencing external symbols
encoding, count = ks.asm("call printf", addr=0x400000)
print(f"Call printf: {bytes(encoding).hex()}")
Assemblierung an bestimmter Adresse
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Specify the address for relative instruction encoding
# This matters for relative jumps and calls
encoding, count = ks.asm("jmp 0x401000", addr=0x400000)
print(f"Jump from 0x400000 to 0x401000: {bytes(encoding).hex()}")
encoding, count = ks.asm("call 0x401000", addr=0x400000)
print(f"Call from 0x400000 to 0x401000: {bytes(encoding).hex()}")
Syntax-Optionen
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Intel syntax (default)
ks.syntax = KS_OPT_SYNTAX_INTEL
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"Intel: {bytes(encoding).hex()}")
# AT&T syntax
ks.syntax = KS_OPT_SYNTAX_ATT
encoding, _ = ks.asm("movq -0x10(%rbp), %rax")
print(f"AT&T: {bytes(encoding).hex()}")
# NASM syntax
ks.syntax = KS_OPT_SYNTAX_NASM
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"NASM: {bytes(encoding).hex()}")
Fehlerbehandlung
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
try:
encoding, count = ks.asm("invalid_instruction rax, rbx")
except KsError as e:
print(f"Assembly error: {e}")
print(f"Error code: {e.errno}")
# Check assembly count for partial failures
code = "nop; nop; nop"
encoding, count = ks.asm(code)
if count < 3:
print(f"Warning: only {count} of 3 instructions assembled")
C-API
#include <keystone/keystone.h>
#include <stdio.h>
int main(void) {
ks_engine *ks;
unsigned char *encode;
size_t size, count;
// Initialize Keystone for x86-64
if (ks_open(KS_ARCH_X86, KS_MODE_64, &ks) != KS_ERR_OK) {
printf("Failed to initialize Keystone\n");
return -1;
}
// Assemble instructions
if (ks_asm(ks, "mov rax, rbx; ret", 0, &encode, &size, &count) != KS_ERR_OK) {
printf("Assembly error: %s\n", ks_strerror(ks_errno(ks)));
} else {
printf("Assembled %zu instructions, %zu bytes: ", count, size);
for (size_t i = 0; i < size; i++) {
printf("%02x ", encode[i]);
}
printf("\n");
ks_free(encode);
}
ks_close(ks);
return 0;
}
# Compile
gcc asm.c -o asm -lkeystone
Integration: Assemble, Emulate, Disassemble
from keystone import Ks, KS_ARCH_X86, KS_MODE_64
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from unicorn import Uc, UC_ARCH_X86, UC_MODE_64
from unicorn.x86_const import UC_X86_REG_RAX, UC_X86_REG_RDI, UC_X86_REG_RSI
# Step 1: Assemble
ks = Ks(KS_ARCH_X86, KS_MODE_64)
code, _ = ks.asm("mov rax, rdi; add rax, rsi; ret")
shellcode = bytes(code)
# Step 2: Disassemble to verify
cs = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in cs.disasm(shellcode, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
# Step 3: Emulate
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
mu.mem_write(0x1000, shellcode)
mu.reg_write(UC_X86_REG_RDI, 40)
mu.reg_write(UC_X86_REG_RSI, 2)
mu.emu_start(0x1000, 0x1000 + len(shellcode) - 1)
print(f"Result: {mu.reg_read(UC_X86_REG_RAX)}") # 42
Shellcode erzeugen
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Linux x86-64 execve("/bin/sh") shellcode
shellcode_asm = """
xor rsi, rsi
push rsi
mov rdi, 0x68732f2f6e69622f
push rdi
mov rdi, rsp
xor rdx, rdx
mov al, 59
syscall
"""
encoding, count = ks.asm(shellcode_asm)
sc = bytes(encoding)
# Print as C array
print("unsigned char shellcode[] = {")
print(" " + ", ".join(f"0x{b:02x}" for b in sc))
print("};")
print(f"// Length: {len(sc)} bytes")