Zum Inhalt springen

PyGhidra

PyGhidra (ehemals pyhidra) bietet eine Python-Schnittstelle zu Ghidras Analyse-Engine und ermöglicht es, Ghidra-Headless-Analysen auszuführen, Skripte auszuführen, auf die Programm-API zuzugreifen, Funktionen zu iterieren, Querverweisanalysen durchzuführen und Code zu dekompilieren — alles aus Python ohne die Ghidra-GUI.

Installation

# Install pyghidra
pip install pyghidra

# Requires Ghidra to be installed separately
# Download from https://ghidra-sre.org

# Set Ghidra installation path
export GHIDRA_INSTALL_DIR="/opt/ghidra"

# Or configure in Python
import pyghidra
pyghidra.start(ghidra_install_dir="/opt/ghidra")

PyGhidra starten

Basic Initialization

import pyghidra

# Start the Ghidra JVM
pyghidra.start()

# Start with specific Ghidra installation
pyghidra.start(ghidra_install_dir="/opt/ghidra_11.3")

Headless Analysis

import pyghidra

pyghidra.start()

# Open and analyze a binary
with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    print(f"Program: {program.getName()}")
    print(f"Language: {program.getLanguageID()}")
    print(f"Compiler: {program.getCompilerSpec().getCompilerSpecID()}")

    # Get the listing (disassembly)
    listing = program.getListing()

    # Get memory information
    memory = program.getMemory()
    for block in memory.getBlocks():
        print(f"  {block.getName()}: 0x{block.getStart().getOffset():x} "
              f"- 0x{block.getEnd().getOffset():x} "
              f"({'R' if block.isRead() else ''}"
              f"{'W' if block.isWrite() else ''}"
              f"{'X' if block.isExecute() else ''})")

Funktionsiteration

import pyghidra

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    fm = program.getFunctionManager()

    # Iterate all functions
    for func in fm.getFunctions(True):  # True = forward direction
        entry = func.getEntryPoint()
        name = func.getName()
        size = func.getBody().getNumAddresses()
        params = func.getParameterCount()
        print(f"0x{entry.getOffset():x}: {name} "
              f"(size={size}, params={params})")

    # Get function count
    print(f"\nTotal functions: {fm.getFunctionCount()}")

    # Find function by name
    funcs = list(fm.getFunctions(True))
    main_funcs = [f for f in funcs if f.getName() == "main"]
    if main_funcs:
        main = main_funcs[0]
        print(f"main() at 0x{main.getEntryPoint().getOffset():x}")

Cross-References

import pyghidra
from ghidra.program.model.symbol import RefType

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    ref_mgr = program.getReferenceManager()
    fm = program.getFunctionManager()

    # Get references TO an address
    addr = flat_api.toAddr(0x401000)
    refs_to = ref_mgr.getReferencesTo(addr)
    print(f"References to 0x{addr.getOffset():x}:")
    for ref in refs_to:
        from_addr = ref.getFromAddress()
        ref_type = ref.getReferenceType()
        print(f"  <- 0x{from_addr.getOffset():x} ({ref_type})")

    # Get references FROM an address
    refs_from = ref_mgr.getReferencesFrom(addr)
    print(f"\nReferences from 0x{addr.getOffset():x}:")
    for ref in refs_from:
        to_addr = ref.getToAddress()
        print(f"  -> 0x{to_addr.getOffset():x} ({ref.getReferenceType()})")

    # Find all callers of a function
    func = fm.getFunctionAt(flat_api.toAddr(0x401000))
    if func:
        callers = func.getCallingFunctions(None)
        print(f"\nCallers of {func.getName()}:")
        for caller in callers:
            print(f"  {caller.getName()} @ 0x{caller.getEntryPoint().getOffset():x}")

    # Find all functions called by a function
    if func:
        callees = func.getCalledFunctions(None)
        print(f"\n{func.getName()} calls:")
        for callee in callees:
            print(f"  {callee.getName()} @ 0x{callee.getEntryPoint().getOffset():x}")

Dekompilierung

import pyghidra
from ghidra.app.decompiler import DecompInterface

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    fm = program.getFunctionManager()

    # Set up the decompiler
    decomp = DecompInterface()
    decomp.openProgram(program)

    # Decompile a specific function
    func = fm.getFunctionAt(flat_api.toAddr(0x401000))
    if func:
        results = decomp.decompileFunction(func, 30, None)  # 30s timeout
        if results.decompileCompleted():
            c_code = results.getDecompiledFunction().getC()
            print(c_code)
        else:
            print(f"Decompilation failed: {results.getErrorMessage()}")

    # Decompile all functions
    for func in fm.getFunctions(True):
        results = decomp.decompileFunction(func, 30, None)
        if results.decompileCompleted():
            code = results.getDecompiledFunction().getC()
            print(f"// Function: {func.getName()}")
            print(code)
            print()

    decomp.dispose()

Zeichenketten und Daten

import pyghidra
from ghidra.program.model.data import StringDataType

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    listing = program.getListing()

    # Find all defined strings
    data_iter = listing.getDefinedData(True)
    for data in data_iter:
        dt = data.getDataType()
        if "string" in dt.getName().lower():
            value = data.getValue()
            addr = data.getAddress()
            print(f"0x{addr.getOffset():x}: {value}")

    # Search for a specific string
    memory = program.getMemory()
    search_bytes = b"password"
    addr = memory.findBytes(
        program.getMinAddress(),
        search_bytes,
        None,  # mask
        True,  # forward
        None   # monitor
    )
    if addr:
        print(f"Found 'password' at 0x{addr.getOffset():x}")

Patching

import pyghidra
from ghidra.program.model.mem import MemoryAccessException

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    memory = program.getMemory()

    # Read bytes
    addr = flat_api.toAddr(0x401000)
    buf = bytearray(16)
    memory.getBytes(addr, buf)
    print(f"Original bytes: {buf.hex()}")

    # Write/patch bytes (within a transaction)
    tx_id = program.startTransaction("Patch NOP")
    try:
        # NOP out an instruction (x86)
        memory.setBytes(addr, bytes([0x90, 0x90, 0x90, 0x90, 0x90]))
        program.endTransaction(tx_id, True)  # True = commit
    except Exception as e:
        program.endTransaction(tx_id, False)  # False = rollback
        print(f"Patch failed: {e}")

    # Save the modified program
    program.save("Patched binary", None)

Ghidra-Projektverwaltung

import pyghidra
from java.io import File

pyghidra.start()

# Create a new Ghidra project
project_dir = "/tmp/ghidra_projects"
project_name = "my_analysis"

with pyghidra.open_program(
    "/path/to/binary",
    project_location=project_dir,
    project_name=project_name,
    analyze=True  # Run auto-analysis
) as flat_api:
    program = flat_api.getCurrentProgram()

    # Program is now analyzed and available
    fm = program.getFunctionManager()
    print(f"Functions found: {fm.getFunctionCount()}")

Skriptausführung

import pyghidra

pyghidra.start()

# Run a Ghidra script on a binary
with pyghidra.open_program("/path/to/binary") as flat_api:
    # Using flat API methods (same as GhidraScript)
    program = flat_api.getCurrentProgram()

    # Find bytes pattern
    addr = flat_api.find(b"\x48\x89\xe5")  # mov rbp, rsp
    if addr:
        print(f"Pattern found at: 0x{addr.getOffset():x}")

    # Get instruction at address
    insn = flat_api.getInstructionAt(flat_api.toAddr(0x401000))
    if insn:
        print(f"Instruction: {insn}")
        print(f"Mnemonic: {insn.getMnemonicString()}")

    # Create a label/symbol
    tx_id = program.startTransaction("Add label")
    try:
        sym_table = program.getSymbolTable()
        sym_table.createLabel(
            flat_api.toAddr(0x401000),
            "my_interesting_function",
            ghidra.program.model.symbol.SourceType.USER_DEFINED
        )
        program.endTransaction(tx_id, True)
    except Exception:
        program.endTransaction(tx_id, False)

Stapelanalyse

import pyghidra
import os

pyghidra.start()

def analyze_binary(filepath):
    """Analyze a single binary and extract key info."""
    with pyghidra.open_program(filepath) as flat_api:
        program = flat_api.getCurrentProgram()
        fm = program.getFunctionManager()

        info = {
            "name": program.getName(),
            "arch": str(program.getLanguageID()),
            "functions": fm.getFunctionCount(),
            "imports": [],
            "exports": [],
        }

        # Get imports
        sym_table = program.getSymbolTable()
        for sym in sym_table.getExternalSymbols():
            info["imports"].append(sym.getName())

        # Get exported functions
        for func in fm.getFunctions(True):
            if func.isExternal():
                continue
            if func.isThunk():
                continue
            info["exports"].append(func.getName())

        return info

# Analyze all binaries in a directory
for fname in os.listdir("/path/to/samples"):
    fpath = os.path.join("/path/to/samples", fname)
    try:
        result = analyze_binary(fpath)
        print(f"{result['name']}: {result['functions']} functions, "
              f"{len(result['imports'])} imports")
    except Exception as e:
        print(f"Failed to analyze {fname}: {e}")