Salta ai contenuti

PyGhidra

PyGhidra (precedentemente pyhidra) fornisce un’interfaccia Python al motore di analisi di Ghidra, permettendo di eseguire analisi headless di Ghidra, eseguire script, accedere all’API del programma, iterare le funzioni, eseguire analisi dei riferimenti incrociati e decompilare il codice — tutto da Python senza la GUI di Ghidra.

Installazione

# Install pyghidra
pip install pyghidra

# Requires Ghidra to be installed separately
# Download from https://ghidra-sre.org

# Set Ghidra installation path
export GHIDRA_INSTALL_DIR="/opt/ghidra"

# Or configure in Python
import pyghidra
pyghidra.start(ghidra_install_dir="/opt/ghidra")

Avvio PyGhidra

Basic Initialization

import pyghidra

# Start the Ghidra JVM
pyghidra.start()

# Start with specific Ghidra installation
pyghidra.start(ghidra_install_dir="/opt/ghidra_11.3")

Headless Analysis

import pyghidra

pyghidra.start()

# Open and analyze a binary
with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    print(f"Program: {program.getName()}")
    print(f"Language: {program.getLanguageID()}")
    print(f"Compiler: {program.getCompilerSpec().getCompilerSpecID()}")

    # Get the listing (disassembly)
    listing = program.getListing()

    # Get memory information
    memory = program.getMemory()
    for block in memory.getBlocks():
        print(f"  {block.getName()}: 0x{block.getStart().getOffset():x} "
              f"- 0x{block.getEnd().getOffset():x} "
              f"({'R' if block.isRead() else ''}"
              f"{'W' if block.isWrite() else ''}"
              f"{'X' if block.isExecute() else ''})")

Iterazione funzioni

import pyghidra

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    fm = program.getFunctionManager()

    # Iterate all functions
    for func in fm.getFunctions(True):  # True = forward direction
        entry = func.getEntryPoint()
        name = func.getName()
        size = func.getBody().getNumAddresses()
        params = func.getParameterCount()
        print(f"0x{entry.getOffset():x}: {name} "
              f"(size={size}, params={params})")

    # Get function count
    print(f"\nTotal functions: {fm.getFunctionCount()}")

    # Find function by name
    funcs = list(fm.getFunctions(True))
    main_funcs = [f for f in funcs if f.getName() == "main"]
    if main_funcs:
        main = main_funcs[0]
        print(f"main() at 0x{main.getEntryPoint().getOffset():x}")

Cross-References

import pyghidra
from ghidra.program.model.symbol import RefType

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    ref_mgr = program.getReferenceManager()
    fm = program.getFunctionManager()

    # Get references TO an address
    addr = flat_api.toAddr(0x401000)
    refs_to = ref_mgr.getReferencesTo(addr)
    print(f"References to 0x{addr.getOffset():x}:")
    for ref in refs_to:
        from_addr = ref.getFromAddress()
        ref_type = ref.getReferenceType()
        print(f"  <- 0x{from_addr.getOffset():x} ({ref_type})")

    # Get references FROM an address
    refs_from = ref_mgr.getReferencesFrom(addr)
    print(f"\nReferences from 0x{addr.getOffset():x}:")
    for ref in refs_from:
        to_addr = ref.getToAddress()
        print(f"  -> 0x{to_addr.getOffset():x} ({ref.getReferenceType()})")

    # Find all callers of a function
    func = fm.getFunctionAt(flat_api.toAddr(0x401000))
    if func:
        callers = func.getCallingFunctions(None)
        print(f"\nCallers of {func.getName()}:")
        for caller in callers:
            print(f"  {caller.getName()} @ 0x{caller.getEntryPoint().getOffset():x}")

    # Find all functions called by a function
    if func:
        callees = func.getCalledFunctions(None)
        print(f"\n{func.getName()} calls:")
        for callee in callees:
            print(f"  {callee.getName()} @ 0x{callee.getEntryPoint().getOffset():x}")

Decompilazione

import pyghidra
from ghidra.app.decompiler import DecompInterface

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    fm = program.getFunctionManager()

    # Set up the decompiler
    decomp = DecompInterface()
    decomp.openProgram(program)

    # Decompile a specific function
    func = fm.getFunctionAt(flat_api.toAddr(0x401000))
    if func:
        results = decomp.decompileFunction(func, 30, None)  # 30s timeout
        if results.decompileCompleted():
            c_code = results.getDecompiledFunction().getC()
            print(c_code)
        else:
            print(f"Decompilation failed: {results.getErrorMessage()}")

    # Decompile all functions
    for func in fm.getFunctions(True):
        results = decomp.decompileFunction(func, 30, None)
        if results.decompileCompleted():
            code = results.getDecompiledFunction().getC()
            print(f"// Function: {func.getName()}")
            print(code)
            print()

    decomp.dispose()

Stringhe e dati

import pyghidra
from ghidra.program.model.data import StringDataType

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    listing = program.getListing()

    # Find all defined strings
    data_iter = listing.getDefinedData(True)
    for data in data_iter:
        dt = data.getDataType()
        if "string" in dt.getName().lower():
            value = data.getValue()
            addr = data.getAddress()
            print(f"0x{addr.getOffset():x}: {value}")

    # Search for a specific string
    memory = program.getMemory()
    search_bytes = b"password"
    addr = memory.findBytes(
        program.getMinAddress(),
        search_bytes,
        None,  # mask
        True,  # forward
        None   # monitor
    )
    if addr:
        print(f"Found 'password' at 0x{addr.getOffset():x}")

Patching

import pyghidra
from ghidra.program.model.mem import MemoryAccessException

pyghidra.start()

with pyghidra.open_program("/path/to/binary") as flat_api:
    program = flat_api.getCurrentProgram()
    memory = program.getMemory()

    # Read bytes
    addr = flat_api.toAddr(0x401000)
    buf = bytearray(16)
    memory.getBytes(addr, buf)
    print(f"Original bytes: {buf.hex()}")

    # Write/patch bytes (within a transaction)
    tx_id = program.startTransaction("Patch NOP")
    try:
        # NOP out an instruction (x86)
        memory.setBytes(addr, bytes([0x90, 0x90, 0x90, 0x90, 0x90]))
        program.endTransaction(tx_id, True)  # True = commit
    except Exception as e:
        program.endTransaction(tx_id, False)  # False = rollback
        print(f"Patch failed: {e}")

    # Save the modified program
    program.save("Patched binary", None)

Gestione progetti Ghidra

import pyghidra
from java.io import File

pyghidra.start()

# Create a new Ghidra project
project_dir = "/tmp/ghidra_projects"
project_name = "my_analysis"

with pyghidra.open_program(
    "/path/to/binary",
    project_location=project_dir,
    project_name=project_name,
    analyze=True  # Run auto-analysis
) as flat_api:
    program = flat_api.getCurrentProgram()

    # Program is now analyzed and available
    fm = program.getFunctionManager()
    print(f"Functions found: {fm.getFunctionCount()}")

Esecuzione script

import pyghidra

pyghidra.start()

# Run a Ghidra script on a binary
with pyghidra.open_program("/path/to/binary") as flat_api:
    # Using flat API methods (same as GhidraScript)
    program = flat_api.getCurrentProgram()

    # Find bytes pattern
    addr = flat_api.find(b"\x48\x89\xe5")  # mov rbp, rsp
    if addr:
        print(f"Pattern found at: 0x{addr.getOffset():x}")

    # Get instruction at address
    insn = flat_api.getInstructionAt(flat_api.toAddr(0x401000))
    if insn:
        print(f"Instruction: {insn}")
        print(f"Mnemonic: {insn.getMnemonicString()}")

    # Create a label/symbol
    tx_id = program.startTransaction("Add label")
    try:
        sym_table = program.getSymbolTable()
        sym_table.createLabel(
            flat_api.toAddr(0x401000),
            "my_interesting_function",
            ghidra.program.model.symbol.SourceType.USER_DEFINED
        )
        program.endTransaction(tx_id, True)
    except Exception:
        program.endTransaction(tx_id, False)

Analisi batch

import pyghidra
import os

pyghidra.start()

def analyze_binary(filepath):
    """Analyze a single binary and extract key info."""
    with pyghidra.open_program(filepath) as flat_api:
        program = flat_api.getCurrentProgram()
        fm = program.getFunctionManager()

        info = {
            "name": program.getName(),
            "arch": str(program.getLanguageID()),
            "functions": fm.getFunctionCount(),
            "imports": [],
            "exports": [],
        }

        # Get imports
        sym_table = program.getSymbolTable()
        for sym in sym_table.getExternalSymbols():
            info["imports"].append(sym.getName())

        # Get exported functions
        for func in fm.getFunctions(True):
            if func.isExternal():
                continue
            if func.isThunk():
                continue
            info["exports"].append(func.getName())

        return info

# Analyze all binaries in a directory
for fname in os.listdir("/path/to/samples"):
    fpath = os.path.join("/path/to/samples", fname)
    try:
        result = analyze_binary(fpath)
        print(f"{result['name']}: {result['functions']} functions, "
              f"{len(result['imports'])} imports")
    except Exception as e:
        print(f"Failed to analyze {fname}: {e}")