Ir al contenido

Comandos de Keystone

Keystone es un motor de ensamblado ligero y multiarquitectura. Proporciona una API simple y unificada para ensamblar instrucciones para múltiples arquitecturas incluyendo x86, ARM, MIPS y más.

Instalación

# Python binding
pip install keystone-engine

# From source
git clone https://github.com/keystone-engine/keystone.git
cd keystone
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
make -j$(nproc)
sudo make install

# Update linker cache
sudo ldconfig

# Install Python binding from source
cd ../bindings/python
pip install .

# macOS
brew install keystone
pip install keystone-engine

API de Python: ensamblado básico

Ensamblado x86-64

from keystone import *

# Create assembler for x86-64
ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Assemble a single instruction
encoding, count = ks.asm("mov rax, rbx")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions assembled: {count}")

# Assemble multiple instructions (semicolon-separated)
encoding, count = ks.asm("push rbp; mov rbp, rsp; sub rsp, 0x20; pop rbp; ret")
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")

x86 32 bits

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_32)
encoding, count = ks.asm("push ebp; mov ebp, esp; xor eax, eax; pop ebp; ret")
print(f"Shellcode: {bytes(encoding).hex()}")

Ensamblado ARM

from keystone import *

# ARM 32-bit
ks_arm = Ks(KS_ARCH_ARM, KS_MODE_ARM)
encoding, count = ks_arm.asm("mov r0, #5; mov r1, #10; add r2, r0, r1; bx lr")
print(f"ARM: {bytes(encoding).hex()}")

# ARM Thumb mode
ks_thumb = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
encoding, count = ks_thumb.asm("movs r0, #5; movs r1, #10; adds r2, r0, r1")
print(f"Thumb: {bytes(encoding).hex()}")

# AArch64
ks_arm64 = Ks(KS_ARCH_ARM64, KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_arm64.asm("mov x0, #5; mov x1, #10; add x2, x0, x1; ret")
print(f"AArch64: {bytes(encoding).hex()}")

Soporte de arquitecturas

from keystone import *

# Architecture and mode constants
# KS_ARCH_X86      - x86/x86-64
# KS_ARCH_ARM      - ARM (32-bit)
# KS_ARCH_ARM64    - AArch64
# KS_ARCH_MIPS     - MIPS
# KS_ARCH_PPC      - PowerPC
# KS_ARCH_SPARC    - SPARC
# KS_ARCH_SYSTEMZ  - SystemZ
# KS_ARCH_HEXAGON  - Hexagon
# KS_ARCH_EVM      - Ethereum VM

# Mode constants
# KS_MODE_32             - 32-bit
# KS_MODE_64             - 64-bit
# KS_MODE_ARM            - ARM mode
# KS_MODE_THUMB          - Thumb mode
# KS_MODE_LITTLE_ENDIAN  - Little endian (default)
# KS_MODE_BIG_ENDIAN     - Big endian

# MIPS 32-bit little-endian
ks_mips = Ks(KS_ARCH_MIPS, KS_MODE_MIPS32 + KS_MODE_LITTLE_ENDIAN)
encoding, count = ks_mips.asm("addiu $t0, $zero, 5")
print(f"MIPS: {bytes(encoding).hex()}")

# PowerPC 64-bit big-endian
ks_ppc = Ks(KS_ARCH_PPC, KS_MODE_PPC64 + KS_MODE_BIG_ENDIAN)
encoding, count = ks_ppc.asm("li 3, 1; li 0, 1; sc")
print(f"PPC64: {bytes(encoding).hex()}")

Resolución de símbolos

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Assemble with labels
code = """
    start:
        xor rax, rax
        test rdi, rdi
        jz done
        mov rax, rdi
        add rax, rsi
    done:
        ret
"""
encoding, count = ks.asm(code)
print(f"Bytes: {bytes(encoding).hex()}")
print(f"Instructions: {count}")

Resolutor de símbolos personalizado

from keystone import *

# Define a symbol resolver callback
def sym_resolver(symbol, value):
    """Resolve external symbols to addresses."""
    symbols = {
        b"printf": 0x7fff00001000,
        b"malloc": 0x7fff00002000,
        b"free":   0x7fff00003000,
    }
    if symbol in symbols:
        return True, symbols[symbol]
    return False, 0

ks = Ks(KS_ARCH_X86, KS_MODE_64)
ks.sym_resolver = sym_resolver

# Assemble code referencing external symbols
encoding, count = ks.asm("call printf", addr=0x400000)
print(f"Call printf: {bytes(encoding).hex()}")

Ensamblado en dirección específica

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Specify the address for relative instruction encoding
# This matters for relative jumps and calls
encoding, count = ks.asm("jmp 0x401000", addr=0x400000)
print(f"Jump from 0x400000 to 0x401000: {bytes(encoding).hex()}")

encoding, count = ks.asm("call 0x401000", addr=0x400000)
print(f"Call from 0x400000 to 0x401000: {bytes(encoding).hex()}")

Opciones de sintaxis

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Intel syntax (default)
ks.syntax = KS_OPT_SYNTAX_INTEL
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"Intel: {bytes(encoding).hex()}")

# AT&T syntax
ks.syntax = KS_OPT_SYNTAX_ATT
encoding, _ = ks.asm("movq -0x10(%rbp), %rax")
print(f"AT&T: {bytes(encoding).hex()}")

# NASM syntax
ks.syntax = KS_OPT_SYNTAX_NASM
encoding, _ = ks.asm("mov rax, [rbp-0x10]")
print(f"NASM: {bytes(encoding).hex()}")

Manejo de errores

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

try:
    encoding, count = ks.asm("invalid_instruction rax, rbx")
except KsError as e:
    print(f"Assembly error: {e}")
    print(f"Error code: {e.errno}")

# Check assembly count for partial failures
code = "nop; nop; nop"
encoding, count = ks.asm(code)
if count < 3:
    print(f"Warning: only {count} of 3 instructions assembled")

API de C

#include <keystone/keystone.h>
#include <stdio.h>

int main(void) {
    ks_engine *ks;
    unsigned char *encode;
    size_t size, count;

    // Initialize Keystone for x86-64
    if (ks_open(KS_ARCH_X86, KS_MODE_64, &ks) != KS_ERR_OK) {
        printf("Failed to initialize Keystone\n");
        return -1;
    }

    // Assemble instructions
    if (ks_asm(ks, "mov rax, rbx; ret", 0, &encode, &size, &count) != KS_ERR_OK) {
        printf("Assembly error: %s\n", ks_strerror(ks_errno(ks)));
    } else {
        printf("Assembled %zu instructions, %zu bytes: ", count, size);
        for (size_t i = 0; i < size; i++) {
            printf("%02x ", encode[i]);
        }
        printf("\n");
        ks_free(encode);
    }

    ks_close(ks);
    return 0;
}
# Compile
gcc asm.c -o asm -lkeystone

Integración: ensamblar, emular, desensamblar

from keystone import Ks, KS_ARCH_X86, KS_MODE_64
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from unicorn import Uc, UC_ARCH_X86, UC_MODE_64
from unicorn.x86_const import UC_X86_REG_RAX, UC_X86_REG_RDI, UC_X86_REG_RSI

# Step 1: Assemble
ks = Ks(KS_ARCH_X86, KS_MODE_64)
code, _ = ks.asm("mov rax, rdi; add rax, rsi; ret")
shellcode = bytes(code)

# Step 2: Disassemble to verify
cs = Cs(CS_ARCH_X86, CS_MODE_64)
for insn in cs.disasm(shellcode, 0x1000):
    print(f"0x{insn.address:x}: {insn.mnemonic} {insn.op_str}")

# Step 3: Emulate
mu = Uc(UC_ARCH_X86, UC_MODE_64)
mu.mem_map(0x1000, 0x1000)
mu.mem_write(0x1000, shellcode)
mu.reg_write(UC_X86_REG_RDI, 40)
mu.reg_write(UC_X86_REG_RSI, 2)
mu.emu_start(0x1000, 0x1000 + len(shellcode) - 1)
print(f"Result: {mu.reg_read(UC_X86_REG_RAX)}")  # 42

Generación de shellcode

from keystone import *

ks = Ks(KS_ARCH_X86, KS_MODE_64)

# Linux x86-64 execve("/bin/sh") shellcode
shellcode_asm = """
    xor rsi, rsi
    push rsi
    mov rdi, 0x68732f2f6e69622f
    push rdi
    mov rdi, rsp
    xor rdx, rdx
    mov al, 59
    syscall
"""

encoding, count = ks.asm(shellcode_asm)
sc = bytes(encoding)

# Print as C array
print("unsigned char shellcode[] = {")
print("    " + ", ".join(f"0x{b:02x}" for b in sc))
print("};")
print(f"// Length: {len(sc)} bytes")