Ghidra Plugins Cheat Sheet
Overview
Ghidra plugins extend the functionality of the NSA's Ghidra reverse engineering framework. This comprehensive guide covers essential plugins including BinExport, GhidraBridge, Ghidra2Frida, and many others that enhance collaborative analysis, integration with other tools, and advanced reverse engineering capabilities.
💡 Key Benefits: Enhanced collaboration, tool integration, automated analysis, improved workflow efficiency, and extended functionality beyond core Ghidra features.
Essential Plugin Categories
Collaboration and Export Plugins
BinExport Plugin
# Installation
git clone https://github.com/google/binexport.git
cd binexport
mkdir build && cd build
cmake ..
make -j$(nproc)
# Install to Ghidra
cp BinExport.jar $GHIDRA_INSTALL_DIR/Extensions/Ghidra/
# Usage in Ghidra
# File -> Export Program -> BinExport (v2) for BinDiff
# File -> Export Program -> BinExport (v2) for BinNavi
# Command line export
$GHIDRA_INSTALL_DIR/support/analyzeHeadless \
/path/to/project ProjectName \
-import /path/to/binary \
-postScript BinExportScript.java \
-scriptPath /path/to/scripts
# Export formats
# .BinExport - For BinDiff comparison
# .BinExport2 - Enhanced format with more metadata
# SQL export - For BinNavi database import
GhidraBridge
# Installation
pip install ghidra-bridge
# Server setup in Ghidra
# Run GhidraBridge server script in Ghidra Script Manager
# Window -> Script Manager -> GhidraBridge -> ghidra_bridge_server.py
# Python client usage
import ghidra_bridge
# Connect to Ghidra
b = ghidra_bridge.GhidraBridge(namespace=globals())
# Access Ghidra API from Python
current_program = b.getCurrentProgram()
print(f"Program: {current_program.getName()}")
# Get function manager
function_manager = current_program.getFunctionManager()
functions = function_manager.getFunctions(True)
# Iterate through functions
for func in functions:
print(f"Function: {func.getName()} at {func.getEntryPoint()}")
# Get function body
body = func.getBody()
print(f" Size: {body.getNumAddresses()} addresses")
# Get calling functions
callers = func.getCallingFunctions(None)
print(f" Callers: {len(list(callers))}")
# Advanced analysis with external tools
import networkx as nx
def build_call_graph():
"""Build call graph using NetworkX"""
G = nx.DiGraph()
for func in function_manager.getFunctions(True):
func_name = func.getName()
G.add_node(func_name)
# Add edges for function calls
for caller in func.getCallingFunctions(None):
G.add_edge(caller.getName(), func_name)
return G
# Export analysis results
def export_function_info():
"""Export function information to JSON"""
import json
functions_data = []
for func in function_manager.getFunctions(True):
func_data = {
'name': func.getName(),
'address': str(func.getEntryPoint()),
'size': func.getBody().getNumAddresses(),
'signature': func.getSignature().getPrototypeString()
}
functions_data.append(func_data)
with open('ghidra_functions.json', 'w') as f:
json.dump(functions_data, f, indent=2)
return functions_data
# Machine learning integration
def extract_features_for_ml():
"""Extract features for machine learning analysis"""
features = []
for func in function_manager.getFunctions(True):
# Extract various features
feature_vector = {
'name': func.getName(),
'size': func.getBody().getNumAddresses(),
'complexity': len(list(func.getCallingFunctions(None))),
'has_loops': False, # Would need more complex analysis
'instruction_count': 0,
'string_refs': 0,
'api_calls': 0
}
# Analyze function body for more features
instructions = current_program.getListing().getInstructions(func.getBody(), True)
for instruction in instructions:
feature_vector['instruction_count'] += 1
# Check for API calls, string references, etc.
# This would require more detailed analysis
features.append(feature_vector)
return features
Integration and Automation Plugins
Ghidra2Frida
# Installation
# Download from: https://github.com/federicodotta/Ghidra2Frida
# Place in Ghidra Extensions directory
# Usage in Ghidra Script Manager
# Generate Frida hooks for functions
# Example generated Frida script
frida_script = """
// Auto-generated Frida script from Ghidra
// Hook function at 0x401000
Interceptor.attach(ptr("0x401000"), {
onEnter: function(args) {
console.log("[+] Entering function_name");
console.log(" arg0: " + args[0]);
console.log(" arg1: " + args[1]);
// Log stack trace
console.log("Stack trace:");
console.log(Thread.backtrace(this.context, Backtracer.ACCURATE)
.map(DebugSymbol.fromAddress).join("\\n"));
},
onLeave: function(retval) {
console.log("[+] Leaving function_name");
console.log(" Return value: " + retval);
}
});
// Hook string functions
var strcpy = Module.findExportByName(null, "strcpy");
if (strcpy) {
Interceptor.attach(strcpy, {
onEnter: function(args) {
console.log("[strcpy] dest: " + args[0] + ", src: " + Memory.readUtf8String(args[1]));
}
});
}
// Memory scanning for patterns
function scanForPattern(pattern) {
var ranges = Process.enumerateRanges('r--');
ranges.forEach(function(range) {
Memory.scan(range.base, range.size, pattern, {
onMatch: function(address, size) {
console.log("[+] Pattern found at: " + address);
},
onComplete: function() {
console.log("[+] Scan complete for range: " + range.base);
}
});
});
}
// Usage
scanForPattern("41 41 41 41"); // Search for AAAA pattern
"""
# Save and use with Frida
with open('ghidra_hooks.js', 'w') as f:
f.write(frida_script)
# Run with Frida
# frida -l ghidra_hooks.js -f target_binary
Ghidra Jupyter Integration
# Installation and setup
pip install jupyter ghidra-bridge matplotlib pandas
# Jupyter notebook cell
import ghidra_bridge
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Connect to Ghidra
b = ghidra_bridge.GhidraBridge(namespace=globals())
# Function size analysis
def analyze_function_sizes():
"""Analyze and visualize function sizes"""
sizes = []
names = []
function_manager = getCurrentProgram().getFunctionManager()
for func in function_manager.getFunctions(True):
size = func.getBody().getNumAddresses()
sizes.append(size)
names.append(func.getName())
# Create DataFrame
df = pd.DataFrame({
'function': names,
'size': sizes
})
# Statistical analysis
print(f"Total functions: {len(df)}")
print(f"Average size: {df['size'].mean():.2f}")
print(f"Median size: {df['size'].median():.2f}")
print(f"Largest function: {df.loc[df['size'].idxmax(), 'function']} ({df['size'].max()} bytes)")
# Visualization
plt.figure(figsize=(12, 8))
# Histogram
plt.subplot(2, 2, 1)
plt.hist(df['size'], bins=50, alpha=0.7)
plt.xlabel('Function Size (bytes)')
plt.ylabel('Frequency')
plt.title('Function Size Distribution')
# Top 10 largest functions
plt.subplot(2, 2, 2)
top_10 = df.nlargest(10, 'size')
plt.barh(range(len(top_10)), top_10['size'])
plt.yticks(range(len(top_10)), top_10['function'])
plt.xlabel('Size (bytes)')
plt.title('Top 10 Largest Functions')
# Box plot
plt.subplot(2, 2, 3)
plt.boxplot(df['size'])
plt.ylabel('Size (bytes)')
plt.title('Function Size Box Plot')
# Cumulative distribution
plt.subplot(2, 2, 4)
sorted_sizes = np.sort(df['size'])
cumulative = np.arange(1, len(sorted_sizes) + 1) / len(sorted_sizes)
plt.plot(sorted_sizes, cumulative)
plt.xlabel('Function Size (bytes)')
plt.ylabel('Cumulative Probability')
plt.title('Cumulative Distribution')
plt.tight_layout()
plt.show()
return df
# Cross-reference analysis
def analyze_cross_references():
"""Analyze cross-references between functions"""
reference_manager = getCurrentProgram().getReferenceManager()
# Build reference graph
ref_data = []
function_manager = getCurrentProgram().getFunctionManager()
for func in function_manager.getFunctions(True):
func_addr = func.getEntryPoint()
# Get references TO this function
refs_to = reference_manager.getReferencesTo(func_addr)
for ref in refs_to:
from_addr = ref.getFromAddress()
from_func = function_manager.getFunctionContaining(from_addr)
if from_func:
ref_data.append({
'from_function': from_func.getName(),
'to_function': func.getName(),
'reference_type': str(ref.getReferenceType())
})
# Create DataFrame
ref_df = pd.DataFrame(ref_data)
if not ref_df.empty:
# Most referenced functions
most_referenced = ref_df['to_function'].value_counts().head(10)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
most_referenced.plot(kind='bar')
plt.title('Most Referenced Functions')
plt.xlabel('Function')
plt.ylabel('Reference Count')
plt.xticks(rotation=45)
# Reference type distribution
plt.subplot(1, 2, 2)
ref_df['reference_type'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Reference Type Distribution')
plt.tight_layout()
plt.show()
return ref_df
# String analysis
def analyze_strings():
"""Analyze strings in the binary"""
listing = getCurrentProgram().getListing()
memory = getCurrentProgram().getMemory()
strings_data = []
# Get all defined strings
data_iterator = listing.getDefinedData(True)
for data in data_iterator:
if data.hasStringValue():
string_value = data.getValue()
if string_value and len(str(string_value)) > 3:
strings_data.append({
'address': str(data.getAddress()),
'string': str(string_value),
'length': len(str(string_value)),
'type': str(data.getDataType())
})
# Create DataFrame
strings_df = pd.DataFrame(strings_data)
if not strings_df.empty:
# String length analysis
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.hist(strings_df['length'], bins=30, alpha=0.7)
plt.xlabel('String Length')
plt.ylabel('Frequency')
plt.title('String Length Distribution')
# Longest strings
plt.subplot(2, 2, 2)
longest = strings_df.nlargest(10, 'length')
plt.barh(range(len(longest)), longest['length'])
plt.yticks(range(len(longest)), [s[:30] + '...' if len(s) > 30 else s for s in longest['string']])
plt.xlabel('Length')
plt.title('Longest Strings')
# String type distribution
plt.subplot(2, 2, 3)
strings_df['type'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('String Type Distribution')
plt.tight_layout()
plt.show()
# Interesting strings (potential passwords, URLs, etc.)
interesting_patterns = [
r'password', r'passwd', r'pwd',
r'http[s]?://', r'ftp://',
r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}', # Email
r'[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}', # IP address
]
import re
interesting_strings = []
for _, row in strings_df.iterrows():
string_val = row['string'].lower()
for pattern in interesting_patterns:
if re.search(pattern, string_val, re.IGNORECASE):
interesting_strings.append(row)
break
if interesting_strings:
print("Interesting strings found:")
for string_info in interesting_strings[:10]:
print(f" {string_info['address']}: {string_info['string'][:50]}")
return strings_df
# Run analyses
function_df = analyze_function_sizes()
ref_df = analyze_cross_references()
strings_df = analyze_strings()
Advanced Analysis Plugins
Ghidra Decompiler Extensions
// Custom decompiler plugin example
// Place in Ghidra/Features/Decompiler/src/main/java/
import ghidra.app.decompiler.*;
import ghidra.program.model.listing.*;
import ghidra.program.model.pcode.*;
public class CustomDecompilerAnalysis {
public void analyzeFunction(Function function, DecompInterface decompiler) {
// Get high-level representation
DecompileResults results = decompiler.decompileFunction(function, 30, null);
if (results.decompileCompleted()) {
HighFunction highFunction = results.getHighFunction();
// Analyze control flow
analyzeControlFlow(highFunction);
// Analyze data flow
analyzeDataFlow(highFunction);
// Detect patterns
detectSecurityPatterns(highFunction);
}
}
private void analyzeControlFlow(HighFunction highFunction) {
// Get basic blocks
Iterator<PcodeBlockBasic> blocks = highFunction.getBasicBlocks();
while (blocks.hasNext()) {
PcodeBlockBasic block = blocks.next();
// Analyze block structure
System.out.println("Block: " + block.getStart() + " to " + block.getStop());
// Get successors and predecessors
for (int i = 0; i < block.getOutSize(); i++) {
PcodeBlock successor = block.getOut(i);
System.out.println(" Successor: " + successor.getStart());
}
}
}
private void analyzeDataFlow(HighFunction highFunction) {
// Get all variables
Iterator<HighSymbol> symbols = highFunction.getLocalSymbolMap().getSymbols();
while (symbols.hasNext()) {
HighSymbol symbol = symbols.next();
// Analyze variable usage
HighVariable variable = symbol.getHighVariable();
if (variable != null) {
System.out.println("Variable: " + symbol.getName());
System.out.println(" Type: " + variable.getDataType());
System.out.println(" Size: " + variable.getSize());
// Get def-use information
Iterator<PcodeOp> defs = variable.getDescendants();
while (defs.hasNext()) {
PcodeOp def = defs.next();
System.out.println(" Used in: " + def.getOpcode());
}
}
}
}
private void detectSecurityPatterns(HighFunction highFunction) {
// Look for dangerous function calls
String[] dangerousFunctions = {
"strcpy", "strcat", "sprintf", "gets", "scanf"
};
// Analyze P-code operations
Iterator<PcodeOpAST> ops = highFunction.getPcodeOps();
while (ops.hasNext()) {
PcodeOpAST op = ops.next();
if (op.getOpcode() == PcodeOp.CALL) {
// Check if it's a call to dangerous function
Varnode target = op.getInput(0);
if (target.isAddress()) {
// Get function name at target address
// Check against dangerous functions list
System.out.println("Potential security issue: dangerous function call");
}
}
// Look for buffer operations
if (op.getOpcode() == PcodeOp.COPY || op.getOpcode() == PcodeOp.STORE) {
// Analyze for potential buffer overflows
analyzeBufferOperation(op);
}
}
}
private void analyzeBufferOperation(PcodeOpAST op) {
// Simplified buffer overflow detection
Varnode output = op.getOutput();
if (output != null && output.getSize() > 0) {
// Check if operation could exceed buffer bounds
System.out.println("Buffer operation detected at: " + op.getSeqnum().getTarget());
}
}
}
Ghidra Scripting Extensions
# Advanced Ghidra scripting examples
# Crypto detection script
def detect_crypto_constants():
"""Detect cryptographic constants in binary"""
# Common crypto constants
crypto_constants = {
0x67452301: "MD5 initial value A",
0xEFCDAB89: "MD5 initial value B",
0x98BADCFE: "MD5 initial value C",
0x10325476: "MD5 initial value D",
0x6A09E667: "SHA-256 initial value H0",
0xBB67AE85: "SHA-256 initial value H1",
0x3C6EF372: "SHA-256 initial value H2",
0xA54FF53A: "SHA-256 initial value H3",
0x428A2F98: "SHA-256 round constant K0",
0x71374491: "SHA-256 round constant K1",
0x9E3779B9: "TEA delta constant",
0x61C88647: "XTEA delta constant"
}
memory = getCurrentProgram().getMemory()
found_constants = []
# Search for constants in memory
for block in memory.getBlocks():
if block.isInitialized():
block_start = block.getStart()
block_end = block.getEnd()
# Search 4-byte aligned addresses
addr = block_start
while addr.compareTo(block_end) < 0:
try:
# Read 4 bytes as integer
value = memory.getInt(addr)
if value in crypto_constants:
found_constants.append({
'address': addr,
'value': hex(value),
'description': crypto_constants[value]
})
# Create comment
setEOLComment(addr, crypto_constants[value])
addr = addr.add(4)
except:
addr = addr.add(1)
# Print results
print(f"Found {len(found_constants)} crypto constants:")
for const in found_constants:
print(f" {const['address']}: {const['value']} - {const['description']}")
return found_constants
# Function similarity analysis
def analyze_function_similarity():
"""Analyze similarity between functions"""
function_manager = getCurrentProgram().getFunctionManager()
functions = list(function_manager.getFunctions(True))
# Extract features for each function
function_features = {}
for func in functions:
features = extract_function_features(func)
function_features[func.getName()] = features
# Compare functions
similarities = []
for i, func1 in enumerate(functions):
for func2 in functions[i+1:]:
similarity = calculate_similarity(
function_features[func1.getName()],
function_features[func2.getName()]
)
if similarity > 0.8: # High similarity threshold
similarities.append({
'function1': func1.getName(),
'function2': func2.getName(),
'similarity': similarity,
'addr1': func1.getEntryPoint(),
'addr2': func2.getEntryPoint()
})
# Sort by similarity
similarities.sort(key=lambda x: x['similarity'], reverse=True)
print(f"Found {len(similarities)} similar function pairs:")
for sim in similarities[:10]: # Top 10
print(f" {sim['function1']} <-> {sim['function2']}: {sim['similarity']:.3f}")
return similarities
def extract_function_features(function):
"""Extract features from function for similarity analysis"""
features = {
'size': function.getBody().getNumAddresses(),
'block_count': 0,
'call_count': 0,
'instruction_types': {},
'string_refs': 0,
'api_calls': []
}
# Analyze basic blocks
body = function.getBody()
listing = getCurrentProgram().getListing()
# Count instructions and types
instructions = listing.getInstructions(body, True)
for instruction in instructions:
mnemonic = instruction.getMnemonicString()
features['instruction_types'][mnemonic] = features['instruction_types'].get(mnemonic, 0) + 1
# Count calls
if instruction.getFlowType().isCall():
features['call_count'] += 1
# Get call target
refs = instruction.getReferencesFrom()
for ref in refs:
if ref.getReferenceType().isCall():
target_addr = ref.getToAddress()
target_func = getCurrentProgram().getFunctionManager().getFunctionAt(target_addr)
if target_func:
features['api_calls'].append(target_func.getName())
return features
def calculate_similarity(features1, features2):
"""Calculate similarity between two feature sets"""
# Simple similarity based on instruction type distribution
types1 = features1['instruction_types']
types2 = features2['instruction_types']
# Get all instruction types
all_types = set(types1.keys()) | set(types2.keys())
if not all_types:
return 0.0
# Calculate cosine similarity
dot_product = 0
norm1 = 0
norm2 = 0
for inst_type in all_types:
count1 = types1.get(inst_type, 0)
count2 = types2.get(inst_type, 0)
dot_product += count1 * count2
norm1 += count1 * count1
norm2 += count2 * count2
if norm1 == 0 or norm2 == 0:
return 0.0
return dot_product / (math.sqrt(norm1) * math.sqrt(norm2))
# Automated vulnerability detection
def detect_vulnerabilities():
"""Detect potential vulnerabilities in code"""
vulnerabilities = []
# Dangerous function patterns
dangerous_functions = {
'strcpy': 'Buffer overflow risk - no bounds checking',
'strcat': 'Buffer overflow risk - no bounds checking',
'sprintf': 'Buffer overflow risk - no bounds checking',
'gets': 'Buffer overflow risk - reads unlimited input',
'scanf': 'Buffer overflow risk with %s format',
'system': 'Command injection risk',
'exec': 'Command injection risk',
'eval': 'Code injection risk'
}
function_manager = getCurrentProgram().getFunctionManager()
# Check for dangerous function calls
for func in function_manager.getFunctions(True):
body = func.getBody()
listing = getCurrentProgram().getListing()
instructions = listing.getInstructions(body, True)
for instruction in instructions:
if instruction.getFlowType().isCall():
refs = instruction.getReferencesFrom()
for ref in refs:
if ref.getReferenceType().isCall():
target_addr = ref.getToAddress()
target_func = function_manager.getFunctionAt(target_addr)
if target_func:
func_name = target_func.getName()
for dangerous_func, description in dangerous_functions.items():
if dangerous_func in func_name.lower():
vulnerabilities.append({
'type': 'dangerous_function_call',
'function': func.getName(),
'address': instruction.getAddress(),
'dangerous_function': func_name,
'description': description,
'severity': 'high' if dangerous_func in ['gets', 'system'] else 'medium'
})
# Check for format string vulnerabilities
detect_format_string_vulns(vulnerabilities)
# Check for integer overflow patterns
detect_integer_overflow_patterns(vulnerabilities)
# Print results
print(f"Found {len(vulnerabilities)} potential vulnerabilities:")
for vuln in vulnerabilities:
print(f" [{vuln['severity'].upper()}] {vuln['type']} in {vuln['function']}")
print(f" Address: {vuln['address']}")
print(f" Description: {vuln['description']}")
return vulnerabilities
def detect_format_string_vulns(vulnerabilities):
"""Detect format string vulnerabilities"""
# Look for printf-family functions with user-controlled format strings
printf_functions = ['printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf']
function_manager = getCurrentProgram().getFunctionManager()
for func in function_manager.getFunctions(True):
# Analyze function for printf calls
# This is a simplified detection - real analysis would need data flow
pass
def detect_integer_overflow_patterns(vulnerabilities):
"""Detect potential integer overflow patterns"""
# Look for arithmetic operations without bounds checking
# This is a simplified detection
pass
# Run analysis scripts
crypto_constants = detect_crypto_constants()
similar_functions = analyze_function_similarity()
vulnerabilities = detect_vulnerabilities()
Utility and Helper Plugins
Ghidra Batch Processing
# Batch processing utilities for Ghidra
import os
import json
import subprocess
from pathlib import Path
class GhidraBatchProcessor:
def __init__(self, ghidra_path, project_path):
self.ghidra_path = Path(ghidra_path)
self.project_path = Path(project_path)
self.analyze_headless = self.ghidra_path / "support" / "analyzeHeadless"
def batch_analyze(self, binary_paths, scripts=None, output_dir=None):
"""Batch analyze multiple binaries"""
if output_dir is None:
output_dir = Path("./batch_analysis_results")
output_dir.mkdir(exist_ok=True)
results = []
for binary_path in binary_paths:
binary_path = Path(binary_path)
print(f"Analyzing: {binary_path.name}")
# Create project for this binary
project_name = f"batch_{binary_path.stem}"
# Build command
cmd = [
str(self.analyze_headless),
str(self.project_path),
project_name,
"-import", str(binary_path),
"-overwrite"
]
# Add scripts if specified
if scripts:
for script in scripts:
cmd.extend(["-postScript", script])
# Add output directory
cmd.extend(["-scriptPath", str(output_dir)])
try:
# Run analysis
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
analysis_result = {
'binary': str(binary_path),
'project': project_name,
'success': result.returncode == 0,
'stdout': result.stdout,
'stderr': result.stderr
}
results.append(analysis_result)
# Save individual result
result_file = output_dir / f"{binary_path.stem}_result.json"
with open(result_file, 'w') as f:
json.dump(analysis_result, f, indent=2)
except subprocess.TimeoutExpired:
print(f"Timeout analyzing {binary_path.name}")
results.append({
'binary': str(binary_path),
'project': project_name,
'success': False,
'error': 'timeout'
})
# Save batch results
batch_result_file = output_dir / "batch_results.json"
with open(batch_result_file, 'w') as f:
json.dump(results, f, indent=2)
return results
def export_all_functions(self, binary_path, output_format='json'):
"""Export all functions from a binary"""
script_content = f"""
# Export functions script
import json
def export_functions():
program = getCurrentProgram()
function_manager = program.getFunctionManager()
functions_data = []
for func in function_manager.getFunctions(True):
func_data = {{
'name': func.getName(),
'address': str(func.getEntryPoint()),
'size': func.getBody().getNumAddresses(),
'signature': func.getSignature().getPrototypeString() if func.getSignature() else None,
'calling_convention': str(func.getCallingConvention()) if func.getCallingConvention() else None,
'parameter_count': func.getParameterCount(),
'local_variable_count': len(func.getLocalVariables()),
'is_thunk': func.isThunk(),
'is_external': func.isExternal()
}}
# Get function calls
calls = []
body = func.getBody()
listing = program.getListing()
instructions = listing.getInstructions(body, True)
for instruction in instructions:
if instruction.getFlowType().isCall():
refs = instruction.getReferencesFrom()
for ref in refs:
if ref.getReferenceType().isCall():
target_addr = ref.getToAddress()
target_func = function_manager.getFunctionAt(target_addr)
if target_func:
calls.append(target_func.getName())
func_data['calls'] = calls
functions_data.append(func_data)
# Save to file
output_file = "{binary_path.stem}_functions.{output_format}"
with open(output_file, 'w') as f:
json.dump(functions_data, f, indent=2)
print(f"Exported {{len(functions_data)}} functions to {{output_file}}")
export_functions()
"""
# Save script
script_file = Path("export_functions.py")
with open(script_file, 'w') as f:
f.write(script_content)
# Run analysis with script
return self.batch_analyze([binary_path], scripts=[str(script_file)])
# Usage example
def run_batch_analysis():
"""Example of running batch analysis"""
# Setup
ghidra_path = "/opt/ghidra" # Adjust path
project_path = "/tmp/ghidra_projects"
processor = GhidraBatchProcessor(ghidra_path, project_path)
# Find binaries to analyze
binary_paths = [
"/bin/ls",
"/bin/cat",
"/bin/echo"
]
# Custom analysis scripts
analysis_scripts = [
"export_functions.py",
"detect_crypto.py",
"analyze_strings.py"
]
# Run batch analysis
results = processor.batch_analyze(binary_paths, scripts=analysis_scripts)
# Print summary
successful = sum(1 for r in results if r['success'])
print(f"Batch analysis complete: {successful}/{len(results)} successful")
return results
# Ghidra project management utilities
class GhidraProjectManager:
def __init__(self, ghidra_path):
self.ghidra_path = Path(ghidra_path)
def create_project(self, project_path, project_name):
"""Create new Ghidra project"""
cmd = [
str(self.ghidra_path / "support" / "analyzeHeadless"),
str(project_path),
project_name,
"-create"
]
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0
def import_binary(self, project_path, project_name, binary_path, analyze=True):
"""Import binary into project"""
cmd = [
str(self.ghidra_path / "support" / "analyzeHeadless"),
str(project_path),
project_name,
"-import", str(binary_path)
]
if not analyze:
cmd.append("-noanalysis")
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0
def export_project(self, project_path, project_name, export_path, format_type="xml"):
"""Export project data"""
export_script = f"""
# Export project script
import os
def export_project_data():
program = getCurrentProgram()
# Export program as XML
from ghidra.app.util.exporter import XmlExporter
exporter = XmlExporter()
export_file = "{export_path}"
# Configure export options
options = exporter.getDefaultOptions()
# Perform export
success = exporter.export(export_file, program, None, None)
if success:
print(f"Project exported to {{export_file}}")
else:
print("Export failed")
export_project_data()
"""
# Save and run export script
script_file = Path("export_project.py")
with open(script_file, 'w') as f:
f.write(export_script)
cmd = [
str(self.ghidra_path / "support" / "analyzeHeadless"),
str(project_path),
project_name,
"-postScript", str(script_file)
]
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0
# Run examples
if __name__ == "__main__":
# Run batch analysis
batch_results = run_batch_analysis()
# Project management example
ghidra_path = "/opt/ghidra"
manager = GhidraProjectManager(ghidra_path)
# Create project
manager.create_project("/tmp/test_project", "TestProject")
# Import binary
manager.import_binary("/tmp/test_project", "TestProject", "/bin/ls")
# Export project
manager.export_project("/tmp/test_project", "TestProject", "/tmp/exported_project.xml")
Plugin Development
Creating Custom Plugins
// Custom Ghidra plugin template
// Place in Ghidra/Features/Base/src/main/java/
import ghidra.app.plugin.PluginCategoryNames;
import ghidra.app.plugin.ProgramPlugin;
import ghidra.framework.plugintool.*;
import ghidra.framework.plugintool.util.PluginStatus;
import ghidra.program.model.listing.Program;
@PluginInfo(
status = PluginStatus.STABLE,
packageName = "CustomAnalysis",
category = PluginCategoryNames.ANALYSIS,
shortDescription = "Custom analysis plugin",
description = "Performs custom binary analysis tasks"
)
public class CustomAnalysisPlugin extends ProgramPlugin {
public CustomAnalysisPlugin(PluginTool tool) {
super(tool, true, true);
// Initialize plugin
setupActions();
}
private void setupActions() {
// Create menu actions
DockingAction analyzeAction = new DockingAction("Custom Analysis", getName()) {
@Override
public void actionPerformed(ActionContext context) {
performCustomAnalysis();
}
};
analyzeAction.setMenuBarData(new MenuData(
new String[]{"Analysis", "Custom Analysis"},
"CustomAnalysis"
));
analyzeAction.setDescription("Run custom analysis");
analyzeAction.setEnabled(true);
tool.addAction(analyzeAction);
}
private void performCustomAnalysis() {
Program program = getCurrentProgram();
if (program == null) {
return;
}
// Perform analysis
CustomAnalyzer analyzer = new CustomAnalyzer(program);
analyzer.analyze();
// Display results
displayResults(analyzer.getResults());
}
private void displayResults(AnalysisResults results) {
// Create results dialog or panel
CustomResultsDialog dialog = new CustomResultsDialog(results);
tool.showDialog(dialog);
}
@Override
protected void programActivated(Program program) {
// Called when program becomes active
super.programActivated(program);
}
@Override
protected void programDeactivated(Program program) {
// Called when program becomes inactive
super.programDeactivated(program);
}
}
// Custom analyzer class
class CustomAnalyzer {
private Program program;
private AnalysisResults results;
public CustomAnalyzer(Program program) {
this.program = program;
this.results = new AnalysisResults();
}
public void analyze() {
// Perform custom analysis
analyzeFunctions();
analyzeStrings();
analyzeReferences();
}
private void analyzeFunctions() {
FunctionManager functionManager = program.getFunctionManager();
FunctionIterator functions = functionManager.getFunctions(true);
while (functions.hasNext()) {
Function function = functions.next();
// Analyze function
FunctionAnalysis analysis = new FunctionAnalysis();
analysis.setName(function.getName());
analysis.setAddress(function.getEntryPoint());
analysis.setSize(function.getBody().getNumAddresses());
// Add complexity metrics
analysis.setComplexity(calculateComplexity(function));
results.addFunctionAnalysis(analysis);
}
}
private int calculateComplexity(Function function) {
// Simple complexity calculation
return function.getBody().getNumAddresses() / 10;
}
private void analyzeStrings() {
// String analysis implementation
}
private void analyzeReferences() {
// Reference analysis implementation
}
public AnalysisResults getResults() {
return results;
}
}
// Results data structure
class AnalysisResults {
private List<FunctionAnalysis> functionAnalyses;
private List<StringAnalysis> stringAnalyses;
public AnalysisResults() {
this.functionAnalyses = new ArrayList<>();
this.stringAnalyses = new ArrayList<>();
}
public void addFunctionAnalysis(FunctionAnalysis analysis) {
functionAnalyses.add(analysis);
}
public List<FunctionAnalysis> getFunctionAnalyses() {
return functionAnalyses;
}
}
class FunctionAnalysis {
private String name;
private Address address;
private long size;
private int complexity;
// Getters and setters
public void setName(String name) { this.name = name; }
public String getName() { return name; }
public void setAddress(Address address) { this.address = address; }
public Address getAddress() { return address; }
public void setSize(long size) { this.size = size; }
public long getSize() { return size; }
public void setComplexity(int complexity) { this.complexity = complexity; }
public int getComplexity() { return complexity; }
}
Plugin Configuration and Deployment
# Plugin build and deployment
# 1. Build plugin
cd $GHIDRA_INSTALL_DIR
./gradlew buildExtension -PGHIDRA_INSTALL_DIR=$GHIDRA_INSTALL_DIR
# 2. Install plugin
cp dist/CustomAnalysisPlugin.zip $GHIDRA_INSTALL_DIR/Extensions/Ghidra/
# 3. Enable plugin in Ghidra
# File -> Configure -> Configure Plugins -> Check your plugin
# 4. Plugin directory structure
mkdir -p MyCustomPlugin/src/main/java/mypackage
mkdir -p MyCustomPlugin/src/main/resources
mkdir -p MyCustomPlugin/data
# 5. Create extension.properties
cat > MyCustomPlugin/extension.properties << EOF
name=MyCustomPlugin
description=Custom analysis plugin for Ghidra
author=Your Name
createdOn=2025-01-01
version=1.0
EOF
# 6. Create build.gradle
cat > MyCustomPlugin/build.gradle << EOF
apply from: "\$rootProject.projectDir/gradle/javaProject.gradle"
apply from: "\$rootProject.projectDir/gradle/helpProject.gradle"
apply from: "\$rootProject.projectDir/gradle/distributableGhidraModule.gradle"
dependencies {
api project(':Base')
api project(':Decompiler')
}
EOF
# 7. Build and package
./gradlew :MyCustomPlugin:buildExtension
# 8. Install extension
unzip dist/MyCustomPlugin.zip -d $GHIDRA_INSTALL_DIR/Extensions/Ghidra/
Integration Examples
CI/CD Integration
# GitHub Actions workflow for Ghidra analysis
name: Ghidra Binary Analysis
on:
push:
paths:
- 'binaries/**'
pull_request:
paths:
- 'binaries/**'
jobs:
ghidra-analysis:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Setup Java
uses: actions/setup-java@v3
with:
java-version: '17'
distribution: 'temurin'
- name: Download Ghidra
run: |
wget https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.4_build/ghidra_10.4_PUBLIC_20230928.zip
unzip ghidra_10.4_PUBLIC_20230928.zip
export GHIDRA_INSTALL_DIR=$PWD/ghidra_10.4_PUBLIC
- name: Install Ghidra plugins
run: |
# Install BinExport
git clone https://github.com/google/binexport.git
cd binexport
mkdir build && cd build
cmake ..
make -j$(nproc)
cp BinExport.jar $GHIDRA_INSTALL_DIR/Extensions/Ghidra/
- name: Run Ghidra analysis
run: |
# Create analysis script
cat > analyze_binary.py << 'EOF'
import json
import os
def analyze_program():
program = getCurrentProgram()
if not program:
return
results = {
'binary_name': program.getName(),
'architecture': str(program.getLanguage().getProcessor()),
'entry_point': str(program.getImageBase().add(program.getAddressFactory().getDefaultAddressSpace().getMinAddress())),
'functions': [],
'strings': [],
'imports': []
}
# Analyze functions
function_manager = program.getFunctionManager()
for func in function_manager.getFunctions(True):
func_data = {
'name': func.getName(),
'address': str(func.getEntryPoint()),
'size': func.getBody().getNumAddresses()
}
results['functions'].append(func_data)
# Export results
output_file = os.path.join(os.getcwd(), 'analysis_results.json')
with open(output_file, 'w') as f:
json.dump(results, f, indent=2)
print(f"Analysis complete. Results saved to {output_file}")
analyze_program()
EOF
# Run analysis on all binaries
for binary in binaries/*; do
if [ -f "$binary" ]; then
echo "Analyzing $binary"
$GHIDRA_INSTALL_DIR/support/analyzeHeadless \
/tmp/ghidra_projects \
"CI_Analysis_$(basename $binary)" \
-import "$binary" \
-postScript analyze_binary.py \
-overwrite
fi
done
- name: Upload analysis results
uses: actions/upload-artifact@v3
with:
name: ghidra-analysis-results
path: analysis_results.json
- name: Security scan results
run: |
# Parse results for security issues
python3 << 'EOF'
import json
import sys
try:
with open('analysis_results.json', 'r') as f:
results = json.load(f)
# Check for dangerous functions
dangerous_functions = ['strcpy', 'gets', 'sprintf', 'system']
security_issues = []
for func in results.get('functions', []):
func_name = func['name'].lower()
for dangerous in dangerous_functions:
if dangerous in func_name:
security_issues.append({
'type': 'dangerous_function',
'function': func['name'],
'address': func['address'],
'issue': f'Potentially dangerous function: {dangerous}'
})
if security_issues:
print("Security issues found:")
for issue in security_issues:
print(f" - {issue['issue']} in {issue['function']} at {issue['address']}")
sys.exit(1)
else:
print("No security issues detected")
except FileNotFoundError:
print("Analysis results not found")
sys.exit(1)
EOF
Docker Integration
# Dockerfile for Ghidra analysis environment
FROM ubuntu:22.04
# Install dependencies
RUN apt-get update && apt-get install -y \
openjdk-17-jdk \
wget \
unzip \
git \
build-essential \
cmake \
python3 \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
# Install Ghidra
WORKDIR /opt
RUN wget https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_10.4_build/ghidra_10.4_PUBLIC_20230928.zip \
&& unzip ghidra_10.4_PUBLIC_20230928.zip \
&& rm ghidra_10.4_PUBLIC_20230928.zip \
&& mv ghidra_10.4_PUBLIC ghidra
ENV GHIDRA_INSTALL_DIR=/opt/ghidra
ENV PATH=$PATH:$GHIDRA_INSTALL_DIR/support
# Install Python dependencies
RUN pip3 install ghidra-bridge requests
# Install Ghidra plugins
WORKDIR /tmp
RUN git clone https://github.com/google/binexport.git \
&& cd binexport \
&& mkdir build && cd build \
&& cmake .. \
&& make -j$(nproc) \
&& cp BinExport.jar $GHIDRA_INSTALL_DIR/Extensions/Ghidra/
# Create analysis scripts directory
RUN mkdir -p /opt/analysis-scripts
# Copy analysis scripts
COPY scripts/ /opt/analysis-scripts/
# Create workspace
RUN mkdir -p /workspace/projects /workspace/binaries /workspace/results
WORKDIR /workspace
# Entry point script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
#!/bin/bash
# entrypoint.sh
set -e
# Default values
PROJECT_NAME=${PROJECT_NAME:-"analysis_project"}
BINARY_PATH=${BINARY_PATH:-""}
ANALYSIS_SCRIPTS=${ANALYSIS_SCRIPTS:-""}
OUTPUT_DIR=${OUTPUT_DIR:-"/workspace/results"}
# Create output directory
mkdir -p "$OUTPUT_DIR"
if [ -z "$BINARY_PATH" ]; then
echo "Error: BINARY_PATH environment variable must be set"
exit 1
fi
if [ ! -f "$BINARY_PATH" ]; then
echo "Error: Binary file not found: $BINARY_PATH"
exit 1
fi
echo "Starting Ghidra analysis..."
echo "Binary: $BINARY_PATH"
echo "Project: $PROJECT_NAME"
echo "Output: $OUTPUT_DIR"
# Build analysis command
ANALYSIS_CMD="$GHIDRA_INSTALL_DIR/support/analyzeHeadless \
/workspace/projects \
$PROJECT_NAME \
-import $BINARY_PATH \
-overwrite"
# Add analysis scripts if specified
if [ -n "$ANALYSIS_SCRIPTS" ]; then
for script in $ANALYSIS_SCRIPTS; do
if [ -f "/opt/analysis-scripts/$script" ]; then
ANALYSIS_CMD="$ANALYSIS_CMD -postScript /opt/analysis-scripts/$script"
else
echo "Warning: Script not found: $script"
fi
done
fi
# Run analysis
eval $ANALYSIS_CMD
# Copy results
if [ -d "/workspace/projects/$PROJECT_NAME.rep" ]; then
cp -r "/workspace/projects/$PROJECT_NAME.rep" "$OUTPUT_DIR/"
fi
echo "Analysis complete. Results saved to $OUTPUT_DIR"
# Keep container running if requested
if [ "$KEEP_RUNNING" = "true" ]; then
echo "Keeping container running..."
tail -f /dev/null
fi
# Docker usage examples
# Build the image
docker build -t ghidra-analysis .
# Analyze a single binary
docker run --rm \
-v /path/to/binary:/workspace/binaries/target:ro \
-v /path/to/results:/workspace/results \
-e BINARY_PATH=/workspace/binaries/target \
-e PROJECT_NAME=my_analysis \
-e ANALYSIS_SCRIPTS="export_functions.py detect_crypto.py" \
ghidra-analysis
# Interactive analysis
docker run -it \
-v /path/to/binaries:/workspace/binaries:ro \
-v /path/to/results:/workspace/results \
-e KEEP_RUNNING=true \
ghidra-analysis bash
# Batch analysis with docker-compose
cat > docker-compose.yml << EOF
version: '3.8'
services:
ghidra-analysis:
build: .
volumes:
- ./binaries:/workspace/binaries:ro
- ./results:/workspace/results
- ./custom-scripts:/opt/analysis-scripts/custom:ro
environment:
- PROJECT_NAME=batch_analysis
- ANALYSIS_SCRIPTS=export_functions.py detect_crypto.py custom/my_script.py
command: |
bash -c "
for binary in /workspace/binaries/*; do
if [ -f \"\$binary\" ]; then
echo \"Analyzing \$(basename \$binary)\"
BINARY_PATH=\"\$binary\" \
PROJECT_NAME=\"analysis_\$(basename \$binary)\" \
/entrypoint.sh
fi
done
"
EOF
docker-compose up
Resources and Documentation
Official Resources
- Ghidra GitHub Repository - Source code and official plugins
- Ghidra Documentation - Official documentation and guides
- Ghidra API Documentation - Complete API reference
- Ghidra Plugin Development Guide - Official plugin development tutorial
Community Plugins and Extensions
- Ghidra Plugin Repository - Curated list of plugins
- BinExport - Export to BinDiff and BinNavi
- GhidraBridge - Python bridge for Ghidra
- Ghidra2Frida - Generate Frida hooks
- Ghidra Jupyter - Jupyter notebook integration
Learning Resources
- Ghidra Training Materials - Official training courses
- Ghidra Scripting Tutorial - Scripting guide
- Reverse Engineering with Ghidra - Comprehensive book
- Ghidra Blog Posts - NSA's official blog posts
Development and Contribution
- Ghidra Development Guide - Development setup
- Contributing to Ghidra - Contribution guidelines
- Ghidra Issue Tracker - Bug reports and feature requests
- Ghidra Discussions - Community discussions