Keystone Engine
Keystoneは軽量マルチアーキテクチャアセンブラエンジンです。x86、ARM、AArch64、MIPS、PowerPC、SPARCなどのアーキテクチャの命令をアセンブルするシンプルなAPIを提供し、バイナリ分析ワークフローのためにCapstone(逆アセンブラ)やUnicorn(エミュレータ)と併用されます。
インストール
# Python binding
pip install keystone-engine
# From source
git clone https://github.com/keystone-engine/keystone.git
cd keystone
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
make -j$(nproc)
sudo make install
# Update linker cache
sudo ldconfig
# Install Python binding from source
cd ../bindings/python
pip install .
# macOS
brew install keystone
pip install keystone-engine
Python API: Basic Assembly
x86-64 Assembly
from keystone import *
# Create assembler for x86-64
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Assemble a single instruction
encoding, count = ks.asm("mov rax, rbx")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions assembled: {count}")
# Assemble multiple instructions (semicolon-separated)
encoding, count = ks.asm("push rbp; mov rbp, rsp; sub rsp, 0x20; pop rbp; ret")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")
x86 32-bit
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_32)
encoding, count = ks.asm("push ebp; mov ebp, esp; xor eax, eax; pop ebp; ret")
print(f"Shellcode: {bytes(encoding).hex()}")
ARM Assembly
from keystone import *
# ARM 32-bit
ks_arm = Ks(KS_ARCH_ARM, KS_MODE_ARM)
encoding, count = ks_arm.asm("mov r0, #5; mov r1, #10; add r2, r0, r1; bx lr")
print(f"ARM: {bytes(encoding).hex()}")
# ARM Thumb mode
ks_thumb = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
encoding, count = ks_thumb.asm("movs r0, #5; movs r1, #10; adds r2, r0, r1")
print(f"Thumb: {bytes(encoding).hex()}")
# AArch64
ks_arm64 = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_arm64.asm("mov x0, #5; mov x1, #10; add x2, x0, x1; ret")
print(f"AArch64: {bytes(encoding).hex()}")
Architecture Support
from keystone import *
# Architecture and mode constants
# KS_ARCH_X86 - x86/x86-64
# KS_ARCH_ARM - ARM (32-bit)
# KS_ARCH_ARM64 - AArch64
# KS_ARCH_MIPS - MIPS
# KS_ARCH_PPC - PowerPC
# KS_ARCH_SPARC - SPARC
# KS_ARCH_SYSTEMZ - SystemZ
# KS_ARCH_HEXAGON - Hexagon
# KS_ARCH_EVM - Ethereum VM
# Mode constants
# KS_MODE_32 - 32-bit
# KS_MODE_64 - 64-bit
# KS_MODE_ARM - ARM mode
# KS_MODE_THUMB - Thumb mode
# KS_MODE_LITTLE_ENDIAN - Little endian (default)
# KS_MODE_BIG_ENDIAN - Big endian
# MIPS 32-bit little-endian
ks_mips = Ks(KS_ARCH_MIPS, KS_MODE_MIPS32 + KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_mips.asm("addiu $t0, $zero, 5")
print(f"MIPS: {bytes(encoding).hex()}")
# PowerPC 64-bit big-endian
ks_ppc = Ks(KS_ARCH_PPC, KS_MODE_PPC64 + KS_MODE_BIG_ENDIAN)
encoding, count = ks_ppc.asm("li 3, 1; li 0, 1; sc")
print(f"PPC64: {bytes(encoding).hex()}")
Resolving Symbols
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Assemble with labels
code = """
start:
xor rax, rax
test rdi, rdi
jz done
mov rax, rdi
add rax, rsi
done:
ret
"""
encoding, count = ks.asm(code)
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")
Custom Symbol Resolver
from keystone import *
# Define a symbol resolver callback
def sym_resolver(symbol, value):
"""Resolve external symbols to addresses."""
symbols = {
b"printf": 0x7fff00001000,
b"malloc": 0x7fff00002000,
b"free": 0x7fff00003000,
}
if symbol in symbols:
return True, symbols[symbol]
return False, 0
ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.sym_resolver = sym_resolver
# Assemble code referencing external symbols
encoding, count = ks.asm("call printf", addr=0x400000)
print(f"Call printf: {bytes(encoding).hex()}")
Assembly at Specific Address
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Specify the address for relative instruction encoding
# This matters for relative jumps and calls
encoding, count = ks.asm("jmp 0x401000", addr=0x400000)
print(f"Jump from 0x400000 to 0x401000: {bytes(encoding).hex()}")
encoding, count = ks.asm("call 0x401000", addr=0x400000)
print(f"Call from 0x400000 to 0x401000: {bytes(encoding).hex()}")
Syntax Options
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Intel syntax (default)
ks.syntax = KS_OPT_SYNTAX_INTEL
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"Intel: {bytes(encoding).hex()}")
# AT&T syntax
ks.syntax = KS_OPT_SYNTAX_ATT
encoding, _ = ks.asm("movq -0x10(%rbp), %rax")
print(f"AT&T: {bytes(encoding).hex()}")
# NASM syntax
ks.syntax = KS_OPT_SYNTAX_NASM
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"NASM: {bytes(encoding).hex()}")
Error Handling
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
try:
encoding, count = ks.asm("invalid_instruction rax, rbx")
except KsError as e:
print(f"Assembly error: {e}")
print(f"Error code: {e.errno}")
# Check assembly count for partial failures
code = "nop; nop; nop"
encoding, count = ks.asm(code)
if count < 3:
print(f"Warning: only {count} of 3 instructions assembled")
C API
#include <keystone/keystone.h>
#include <stdio.h>
int main(void) {
ks_engine *ks;
unsigned char *encode;
size_t size, count;
// Initialize Keystone for x86-64
if (ks_open(KS_ARCH_X86, KS_MODE_64, &ks) != KS_ERR_OK) {
printf("Failed to initialize Keystone\n");
return -1;
}
// Assemble instructions
if (ks_asm(ks, "mov rax, rbx; ret", 0, &encode, &size, &count) != KS_ERR_OK) {
printf("Assembly error: %s\n", ks_strerror(ks_errno(ks)));
} else {
printf("Assembled %zu instructions, %zu bytes: ", count, size);
for (size_t i = 0; i < size; i++) {
printf("%02x ", encode[i]);
}
printf("\n");
ks_free(encode);
}
ks_close(ks);
return 0;
}
# Compile
gcc asm.c -o asm -lkeystone
Integration: Assemble, Emulate, Disassemble
from keystone import Ks, KS_ARCH_X86, KS_MODE_64
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from unicorn import Uc, UC_ARCH_X86, UC_MODE_64
from unicorn.x86_const import UC_X86_REG_RAX, UC_X86_REG_RDI, UC_X86_REG_RSI
# Step 1: Assemble
ks = Ks(KS_ARCH_X86, KS_MODE_64)
code, _ = ks.asm("mov rax, rdi; add rax, rsi; ret")
shellcode = bytes(code)
# Step 2: Disassemble to verify
cs = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in cs.disasm(shellcode, 0x1000):
print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
# Step 3: Emulate
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
mu.mem_write(0x1000, shellcode)
mu.reg_write(UC_X86_REG_RDI, 40)
mu.reg_write(UC_X86_REG_RSI, 2)
mu.emu_start(0x1000, 0x1000 + len(shellcode) - 1)
print(f"Result: {mu.reg_read(UC_X86_REG_RAX)}") # 42
Generating Shellcode
from keystone import *
ks = Ks(KS_ARCH_X86, KS_MODE_64)
# Linux x86-64 execve("/bin/sh") shellcode
shellcode_asm = """
xor rsi, rsi
push rsi
mov rdi, 0x68732f2f6e69622f
push rdi
mov rdi, rsp
xor rdx, rdx
mov al, 59
syscall
"""
encoding, count = ks.asm(shellcode_asm)
sc = bytes(encoding)
# Print as C array
print("unsigned char shellcode[] = {")
print(" " + ", ".join(f"0x{b:02x}" for b in sc))
print("};")
print(f"// Length: {len(sc)} bytes")