PyGhidra
PyGhidra (precedentemente pyhidra) fornisce un’interfaccia Python al motore di analisi di Ghidra, permettendo di eseguire analisi headless di Ghidra, eseguire script, accedere all’API del programma, iterare le funzioni, eseguire analisi dei riferimenti incrociati e decompilare il codice — tutto da Python senza la GUI di Ghidra.
Installazione
# Install pyghidra
pip install pyghidra
# Requires Ghidra to be installed separately
# Download from https://ghidra-sre.org
# Set Ghidra installation path
export GHIDRA_INSTALL_DIR="/opt/ghidra"
# Or configure in Python
import pyghidra
pyghidra.start(ghidra_install_dir="/opt/ghidra")
Avvio PyGhidra
Basic Initialization
import pyghidra
# Start the Ghidra JVM
pyghidra.start()
# Start with specific Ghidra installation
pyghidra.start(ghidra_install_dir="/opt/ghidra_11.3")
Headless Analysis
import pyghidra
pyghidra.start()
# Open and analyze a binary
with pyghidra.open_program("/path/to/binary") as flat_api:
program = flat_api.getCurrentProgram()
print(f"Program: {program.getName()}")
print(f"Language: {program.getLanguageID()}")
print(f"Compiler: {program.getCompilerSpec().getCompilerSpecID()}")
# Get the listing (disassembly)
listing = program.getListing()
# Get memory information
memory = program.getMemory()
for block in memory.getBlocks():
print(f" {block.getName()}: 0x{block.getStart().getOffset():x} "
f"- 0x{block.getEnd().getOffset():x} "
f"({'R' if block.isRead() else ''}"
f"{'W' if block.isWrite() else ''}"
f"{'X' if block.isExecute() else ''})")
Iterazione funzioni
import pyghidra
pyghidra.start()
with pyghidra.open_program("/path/to/binary") as flat_api:
program = flat_api.getCurrentProgram()
fm = program.getFunctionManager()
# Iterate all functions
for func in fm.getFunctions(True): # True = forward direction
entry = func.getEntryPoint()
name = func.getName()
size = func.getBody().getNumAddresses()
params = func.getParameterCount()
print(f"0x{entry.getOffset():x}: {name} "
f"(size={size}, params={params})")
# Get function count
print(f"\nTotal functions: {fm.getFunctionCount()}")
# Find function by name
funcs = list(fm.getFunctions(True))
main_funcs = [f for f in funcs if f.getName() == "main"]
if main_funcs:
main = main_funcs[0]
print(f"main() at 0x{main.getEntryPoint().getOffset():x}")
Cross-References
import pyghidra
from ghidra.program.model.symbol import RefType
pyghidra.start()
with pyghidra.open_program("/path/to/binary") as flat_api:
program = flat_api.getCurrentProgram()
ref_mgr = program.getReferenceManager()
fm = program.getFunctionManager()
# Get references TO an address
addr = flat_api.toAddr(0x401000)
refs_to = ref_mgr.getReferencesTo(addr)
print(f"References to 0x{addr.getOffset():x}:")
for ref in refs_to:
from_addr = ref.getFromAddress()
ref_type = ref.getReferenceType()
print(f" <- 0x{from_addr.getOffset():x} ({ref_type})")
# Get references FROM an address
refs_from = ref_mgr.getReferencesFrom(addr)
print(f"\nReferences from 0x{addr.getOffset():x}:")
for ref in refs_from:
to_addr = ref.getToAddress()
print(f" -> 0x{to_addr.getOffset():x} ({ref.getReferenceType()})")
# Find all callers of a function
func = fm.getFunctionAt(flat_api.toAddr(0x401000))
if func:
callers = func.getCallingFunctions(None)
print(f"\nCallers of {func.getName()}:")
for caller in callers:
print(f" {caller.getName()} @ 0x{caller.getEntryPoint().getOffset():x}")
# Find all functions called by a function
if func:
callees = func.getCalledFunctions(None)
print(f"\n{func.getName()} calls:")
for callee in callees:
print(f" {callee.getName()} @ 0x{callee.getEntryPoint().getOffset():x}")
Decompilazione
import pyghidra
from ghidra.app.decompiler import DecompInterface
pyghidra.start()
with pyghidra.open_program("/path/to/binary") as flat_api:
program = flat_api.getCurrentProgram()
fm = program.getFunctionManager()
# Set up the decompiler
decomp = DecompInterface()
decomp.openProgram(program)
# Decompile a specific function
func = fm.getFunctionAt(flat_api.toAddr(0x401000))
if func:
results = decomp.decompileFunction(func, 30, None) # 30s timeout
if results.decompileCompleted():
c_code = results.getDecompiledFunction().getC()
print(c_code)
else:
print(f"Decompilation failed: {results.getErrorMessage()}")
# Decompile all functions
for func in fm.getFunctions(True):
results = decomp.decompileFunction(func, 30, None)
if results.decompileCompleted():
code = results.getDecompiledFunction().getC()
print(f"// Function: {func.getName()}")
print(code)
print()
decomp.dispose()
Stringhe e dati
import pyghidra
from ghidra.program.model.data import StringDataType
pyghidra.start()
with pyghidra.open_program("/path/to/binary") as flat_api:
program = flat_api.getCurrentProgram()
listing = program.getListing()
# Find all defined strings
data_iter = listing.getDefinedData(True)
for data in data_iter:
dt = data.getDataType()
if "string" in dt.getName().lower():
value = data.getValue()
addr = data.getAddress()
print(f"0x{addr.getOffset():x}: {value}")
# Search for a specific string
memory = program.getMemory()
search_bytes = b"password"
addr = memory.findBytes(
program.getMinAddress(),
search_bytes,
None, # mask
True, # forward
None # monitor
)
if addr:
print(f"Found 'password' at 0x{addr.getOffset():x}")
Patching
import pyghidra
from ghidra.program.model.mem import MemoryAccessException
pyghidra.start()
with pyghidra.open_program("/path/to/binary") as flat_api:
program = flat_api.getCurrentProgram()
memory = program.getMemory()
# Read bytes
addr = flat_api.toAddr(0x401000)
buf = bytearray(16)
memory.getBytes(addr, buf)
print(f"Original bytes: {buf.hex()}")
# Write/patch bytes (within a transaction)
tx_id = program.startTransaction("Patch NOP")
try:
# NOP out an instruction (x86)
memory.setBytes(addr, bytes([0x90, 0x90, 0x90, 0x90, 0x90]))
program.endTransaction(tx_id, True) # True = commit
except Exception as e:
program.endTransaction(tx_id, False) # False = rollback
print(f"Patch failed: {e}")
# Save the modified program
program.save("Patched binary", None)
Gestione progetti Ghidra
import pyghidra
from java.io import File
pyghidra.start()
# Create a new Ghidra project
project_dir = "/tmp/ghidra_projects"
project_name = "my_analysis"
with pyghidra.open_program(
"/path/to/binary",
project_location=project_dir,
project_name=project_name,
analyze=True # Run auto-analysis
) as flat_api:
program = flat_api.getCurrentProgram()
# Program is now analyzed and available
fm = program.getFunctionManager()
print(f"Functions found: {fm.getFunctionCount()}")
Esecuzione script
import pyghidra
pyghidra.start()
# Run a Ghidra script on a binary
with pyghidra.open_program("/path/to/binary") as flat_api:
# Using flat API methods (same as GhidraScript)
program = flat_api.getCurrentProgram()
# Find bytes pattern
addr = flat_api.find(b"\x48\x89\xe5") # mov rbp, rsp
if addr:
print(f"Pattern found at: 0x{addr.getOffset():x}")
# Get instruction at address
insn = flat_api.getInstructionAt(flat_api.toAddr(0x401000))
if insn:
print(f"Instruction: {insn}")
print(f"Mnemonic: {insn.getMnemonicString()}")
# Create a label/symbol
tx_id = program.startTransaction("Add label")
try:
sym_table = program.getSymbolTable()
sym_table.createLabel(
flat_api.toAddr(0x401000),
"my_interesting_function",
ghidra.program.model.symbol.SourceType.USER_DEFINED
)
program.endTransaction(tx_id, True)
except Exception:
program.endTransaction(tx_id, False)
Analisi batch
import pyghidra
import os
pyghidra.start()
def analyze_binary(filepath):
"""Analyze a single binary and extract key info."""
with pyghidra.open_program(filepath) as flat_api:
program = flat_api.getCurrentProgram()
fm = program.getFunctionManager()
info = {
"name": program.getName(),
"arch": str(program.getLanguageID()),
"functions": fm.getFunctionCount(),
"imports": [],
"exports": [],
}
# Get imports
sym_table = program.getSymbolTable()
for sym in sym_table.getExternalSymbols():
info["imports"].append(sym.getName())
# Get exported functions
for func in fm.getFunctions(True):
if func.isExternal():
continue
if func.isThunk():
continue
info["exports"].append(func.getName())
return info
# Analyze all binaries in a directory
for fname in os.listdir("/path/to/samples"):
fpath = os.path.join("/path/to/samples", fname)
try:
result = analyze_binary(fpath)
print(f"{result['name']}: {result['functions']} functions, "
f"{len(result['imports'])} imports")
except Exception as e:
print(f"Failed to analyze {fname}: {e}")