Unicorn Engine
Unicorn Engine هو إطار عمل خفيف لمحاكاة المعالج متعدد المعماريات مبني على QEMU. يتيح لك محاكاة كود الآلة عبر x86 و ARM و AArch64 و MIPS و SPARC ومعماريات أخرى مع تحكم دقيق من خلال تعيين الذاكرة والوصول إلى السجلات وخطافات الأدوات.
التثبيت
# Python binding
pip install unicorn
# From source
git clone https://github.com/unicorn-engine/unicorn.git
cd unicorn
mkdir build && cd build
cmake ..
make -j$(nproc)
sudo make install
# Install Python binding from source
cd ../bindings/python
pip install .
# macOS
brew install unicorn
pip install unicorn
Python API: Basic Emulation
x86-64 Emulation
from unicorn import *
from unicorn.x86_const import *
# Machine code: inc ecx; dec edx; add ecx, edx; ret
X86_CODE = b"\xff\xc1\xff\xca\x01\xd1\xc3"
# Memory address for code
ADDRESS = 0x1000000
# Initialize emulator for x86-64
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Map 2MB of memory for code
mu.mem_map(ADDRESS, 2 * 1024 * 1024)
# Write code to memory
mu.mem_write(ADDRESS, X86_CODE)
# Set initial register values
mu.reg_write(UC_X86_REG_ECX, 10)
mu.reg_write(UC_X86_REG_EDX, 5)
# Emulate code
mu.emu_start(ADDRESS, ADDRESS + len(X86_CODE) - 1) # Stop before ret
# Read results
ecx = mu.reg_read(UC_X86_REG_ECX)
edx = mu.reg_read(UC_X86_REG_EDX)
print(f"ECX = {ecx}, EDX = {edx}") # ECX = 16, EDX = 4
ARM Emulation
from unicorn import *
from unicorn.arm_const import *
# ARM code: mov r0, #5; mov r1, #10; add r2, r0, r1
ARM_CODE = b"\x05\x00\xa0\xe3\x0a\x10\xa0\xe3\x01\x20\x80\xe0"
mu = Uc(UC_ARCH_ARM, UC_MODE_ARM)
mu.mem_map(0x10000, 2 * 1024 * 1024)
mu.mem_write(0x10000, ARM_CODE)
mu.emu_start(0x10000, 0x10000 + len(ARM_CODE))
r0 = mu.reg_read(UC_ARM_REG_R0)
r1 = mu.reg_read(UC_ARM_REG_R1)
r2 = mu.reg_read(UC_ARM_REG_R2)
print(f"R0={r0}, R1={r1}, R2={r2}") # R0=5, R1=10, R2=15
البنية Setup
from unicorn import *
# x86 32-bit
mu_x86 = Uc(UC_ARCH_X86, UC_MODE_32)
# x86 64-bit
mu_x64 = Uc(UC_ARCH_X86, UC_MODE_64)
# ARM 32-bit
mu_arm = Uc(UC_ARCH_ARM, UC_MODE_ARM)
# ARM Thumb mode
mu_thumb = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
# AArch64
mu_arm64 = Uc(UC_ARCH_ARM64, UC_MODE_ARM)
# MIPS 32-bit big-endian
mu_mips = Uc(UC_ARCH_MIPS, UC_MODE_MIPS32 + UC_MODE_BIG_ENDIAN)
# MIPS 32-bit little-endian
mu_mipsel = Uc(UC_ARCH_MIPS, UC_MODE_MIPS32 + UC_MODE_LITTLE_ENDIAN)
# RISC-V 64-bit
mu_riscv = Uc(UC_ARCH_RISCV, UC_MODE_RISCV64)
Memory Mapping
from unicorn import *
from unicorn.x86_const import *
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Map memory regions with permissions
CODE_ADDR = 0x400000
STACK_ADDR = 0x7fff0000
DATA_ADDR = 0x600000
# Code: read + execute
mu.mem_map(CODE_ADDR, 0x1000, UC_PROT_READ | UC_PROT_EXEC)
# Stack: read + write
mu.mem_map(STACK_ADDR, 0x10000, UC_PROT_READ | UC_PROT_WRITE)
# Data: read + write
mu.mem_map(DATA_ADDR, 0x1000, UC_PROT_READ | UC_PROT_WRITE)
# Write data to memory
mu.mem_write(DATA_ADDR, b"Hello, Unicorn!\x00")
# Read data from memory
data = mu.mem_read(DATA_ADDR, 16)
print(data) # b'Hello, Unicorn!\x00'
# Set up stack pointer
mu.reg_write(UC_X86_REG_RSP, STACK_ADDR + 0x8000)
# Unmap memory when done
mu.mem_unmap(DATA_ADDR, 0x1000)
Register Access
from unicorn import *
from unicorn.x86_const import *
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
# Write general purpose registers
mu.reg_write(UC_X86_REG_RAX, 0xdeadbeef)
mu.reg_write(UC_X86_REG_RBX, 0xcafebabe)
mu.reg_write(UC_X86_REG_RCX, 100)
mu.reg_write(UC_X86_REG_RDX, 200)
mu.reg_write(UC_X86_REG_RSI, 0x600000)
mu.reg_write(UC_X86_REG_RDI, 0x700000)
mu.reg_write(UC_X86_REG_RSP, 0x7fff8000)
mu.reg_write(UC_X86_REG_RBP, 0x7fff8000)
mu.reg_write(UC_X86_REG_RIP, 0x1000)
# Read registers
rax = mu.reg_read(UC_X86_REG_RAX)
rflags = mu.reg_read(UC_X86_REG_EFLAGS)
print(f"RAX = 0x{rax:016x}")
print(f"RFLAGS = 0x{rflags:08x}")
# Read/write segment registers
mu.reg_write(UC_X86_REG_FS_BASE, 0x600000)
fs_base = mu.reg_read(UC_X86_REG_FS_BASE)
Hooks
Code Hooks (Tracing)
from unicorn import *
from unicorn.x86_const import *
from capstone import *
md = Cs(CS_ARCH_X86, CS_MODE_64)
def hook_code(uc, address, size, user_data):
"""Called before every instruction."""
code = uc.mem_read(address, size)
for insn in md.disasm(bytes(code), address):
print(f">>> 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
mu.mem_write(0x1000, b"\x48\x31\xc0\x48\xff\xc0\xc3") # xor rax,rax; inc rax; ret
# Hook all code execution
mu.hook_add(UC_HOOK_CODE, hook_code)
# Hook code in a specific range only
mu.hook_add(UC_HOOK_CODE, hook_code, begin=0x1000, end=0x1010)
mu.emu_start(0x1000, 0x1000 + 6)
Memory Hooks
def hook_mem_read(uc, access, address, size, value, user_data):
"""Called on every memory read."""
print(f">>> Memory READ at 0x{address:x}, size={size}")
def hook_mem_write(uc, access, address, size, value, user_data):
"""Called on every memory write."""
print(f">>> Memory WRITE at 0x{address:x}, size={size}, value=0x{value:x}")
def hook_mem_invalid(uc, access, address, size, value, user_data):
"""Called on invalid memory access."""
print(f">>> Invalid memory access at 0x{address:x}")
# Map the memory to allow emulation to continue
uc.mem_map(address & ~0xfff, 0x1000)
return True # Continue emulation
mu.hook_add(UC_HOOK_MEM_READ, hook_mem_read)
mu.hook_add(UC_HOOK_MEM_WRITE, hook_mem_write)
mu.hook_add(UC_HOOK_MEM_READ_UNMAPPED | UC_HOOK_MEM_WRITE_UNMAPPED,
hook_mem_invalid)
Interrupt / Syscall Hooks
def hook_interrupt(uc, intno, user_data):
"""Called on interrupt/syscall."""
if intno == 0x80: # Linux int 0x80
eax = uc.reg_read(UC_X86_REG_EAX)
print(f">>> Syscall number: {eax}")
if eax == 1: # sys_exit
uc.emu_stop()
def hook_syscall(uc, user_data):
"""Called on x86-64 syscall instruction."""
rax = uc.reg_read(UC_X86_REG_RAX)
rdi = uc.reg_read(UC_X86_REG_RDI)
rsi = uc.reg_read(UC_X86_REG_RSI)
print(f">>> syscall: rax={rax}, rdi=0x{rdi:x}, rsi=0x{rsi:x}")
mu.hook_add(UC_HOOK_INTR, hook_interrupt)
mu.hook_add(UC_HOOK_INSN, hook_syscall, arg1=UC_X86_INS_SYSCALL)
Emulation Control
from unicorn import *
from unicorn.x86_const import *
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
code = b"\x90" * 100 + b"\xc3" # 100 NOPs + ret
mu.mem_write(0x1000, code)
# Start emulation
mu.emu_start(0x1000, 0x1000 + len(code))
# Start with timeout (microseconds)
mu.emu_start(0x1000, 0x1000 + len(code), timeout=5000000) # 5 seconds
# Start with instruction count limit
mu.emu_start(0x1000, 0x1000 + len(code), count=50) # Max 50 instructions
# Stop emulation from within a hook
def hook_stop(uc, address, size, user_data):
if address == 0x1050:
uc.emu_stop()
mu.hook_add(UC_HOOK_CODE, hook_stop)
Practical: Emulating a Function
from unicorn import *
from unicorn.x86_const import *
def emulate_function(code, args, base=0x400000, stack=0x7fff0000):
"""Emulate an x86-64 function with arguments and return value."""
mu = Uc(UC_ARCH_X86, UC_MODE_64)
# Map code and stack
mu.mem_map(base, 0x10000)
mu.mem_map(stack, 0x10000)
mu.mem_write(base, code)
# Set up stack with return address
ret_addr = 0xdeadbeef
sp = stack + 0x8000
mu.mem_write(sp, ret_addr.to_bytes(8, "little"))
mu.reg_write(UC_X86_REG_RSP, sp)
# x86-64 calling convention: RDI, RSI, RDX, RCX, R8, R9
arg_regs = [UC_X86_REG_RDI, UC_X86_REG_RSI, UC_X86_REG_RDX,
UC_X86_REG_RCX, UC_X86_REG_R8, UC_X86_REG_R9]
for i, arg in enumerate(args[:6]):
mu.reg_write(arg_regs[i], arg)
# Emulate until ret
mu.emu_start(base, ret_addr, timeout=10000000)
return mu.reg_read(UC_X86_REG_RAX)
# Example: emulate add function
# add(a, b) -> a + b
add_code = b"\x48\x89\xf8\x48\x01\xf0\xc3" # mov rax,rdi; add rax,rsi; ret
result = emulate_function(add_code, [10, 20])
print(f"Result: {result}") # Result: 30