コンテンツにスキップ

Goofile

Goofile is a reconnaissance tool that uses Google dorks to find specific file types hosted on a target domain. It automates the process of searching for potentially sensitive files (PDFs, documents, source code, configs, backups) that may be publicly accessible. Goofile is useful for OSINT (Open Source Intelligence) gathering and authorized penetration testing.

sudo apt-get update
sudo apt-get install python3 python3-pip git
git clone https://github.com/1007/goofile.git
cd goofile
pip3 install -r requirements.txt
pip3 install goofile
python3 goofile.py --help
# or if installed via pip
goofile --help
python3 goofile.py -d <domain> -f <filetype>
OptionDescriptionExample
-d, --domainTarget domain (required)-d example.com
-f, --filetypeFile type to search for-f pdf
-l, --limitMax results to return-l 50
-t, --timeoutSearch timeout in seconds-t 10
--proxyUse HTTP proxy--proxy http://proxy:8080
--user-agentCustom User-Agent string--user-agent "Mozilla/5.0..."
-o, --outputSave results to file-o results.txt
-v, --verboseVerbose output-v
--delayDelay between requests (seconds)--delay 2
# Find all PDFs on a domain
python3 goofile.py -d example.com -f pdf

# Find PDFs with limit of 20 results
python3 goofile.py -d example.com -f pdf -l 20

# Find PDFs and save to file
python3 goofile.py -d example.com -f pdf -o pdfs_found.txt
# Microsoft Word documents
python3 goofile.py -d example.com -f docx

# Excel spreadsheets
python3 goofile.py -d example.com -f xlsx

# PowerPoint presentations
python3 goofile.py -d example.com -f pptx
# Search for JavaScript files
python3 goofile.py -d example.com -f js

# Search for configuration files
python3 goofile.py -d example.com -f conf

# Search for backup files
python3 goofile.py -d example.com -f bak
File TypeTypical Content
pdfDocuments, reports, manuals
docx / docWord documents, specifications
xlsx / xlsSpreadsheets, budgets, data
pptx / pptPresentations, slides
zip / rarArchives, backups
sqlDatabase dumps
txtText files, logs, config
conf / configConfiguration files
bak / backupBackup files
exe / zipExecutable files
logLog files
csvCSV data files
# Search for multiple file types sequentially
for filetype in pdf docx xlsx txt sql; do
  echo "[*] Searching for $filetype files..."
  python3 goofile.py -d example.com -f $filetype -o results_$filetype.txt
done

# Combine results
cat results_*.txt > all_results.txt
# Search root domain
python3 goofile.py -d example.com -f pdf

# Search subdomain
python3 goofile.py -d mail.example.com -f pdf

# Search common subdomains
for subdomain in www mail ftp admin dev test; do
  python3 goofile.py -d $subdomain.example.com -f pdf -o $subdomain.txt
done
#!/bin/bash
# Scan multiple domains for PDFs

domains=(
  "target1.com"
  "target2.com"
  "target3.com"
)

for domain in "${domains[@]}"; do
  echo "[*] Scanning $domain"
  python3 goofile.py -d "$domain" -f pdf -o "${domain}_pdfs.txt"
  python3 goofile.py -d "$domain" -f docx -o "${domain}_docs.txt"
done
# Route through proxy server
python3 goofile.py \
  -d example.com \
  -f pdf \
  --proxy http://proxy.company.com:8080

# With authentication
python3 goofile.py \
  -d example.com \
  -f pdf \
  --proxy http://user:pass@proxy.com:8080
# Some versions support SOCKS
python3 goofile.py \
  -d example.com \
  -f pdf \
  --proxy socks5://127.0.0.1:9050
# Use custom User-Agent to avoid detection
python3 goofile.py \
  -d example.com \
  -f pdf \
  --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
# Add delays to be less aggressive
python3 goofile.py \
  -d example.com \
  -f pdf \
  --delay 2 \
  -l 100
# Save to text file
python3 goofile.py -d example.com -f pdf -o results.txt

# View results
cat results.txt

# Count results
wc -l results.txt
# Extract domain names
python3 goofile.py -d example.com -f pdf | cut -d'/' -f3 | sort -u

# Filter for specific pattern
python3 goofile.py -d example.com -f pdf | grep -i "confidential"

# Download found files (with caution)
python3 goofile.py -d example.com -f pdf | xargs -I {} wget {}
#!/usr/bin/env python3
import subprocess
import json
from urllib.parse import urlparse

def search_and_process(domain, filetype):
    """Search for files and process results"""
    cmd = [
        'python3', 'goofile.py',
        '-d', domain,
        '-f', filetype,
        '-v'
    ]
    
    results = []
    try:
        output = subprocess.check_output(cmd, text=True)
        for line in output.strip().split('\n'):
            if line.startswith('http'):
                results.append(line)
    except subprocess.CalledProcessError as e:
        print(f"Error: {e}")
    
    return results

def analyze_results(results):
    """Analyze found URLs"""
    analysis = {
        'total': len(results),
        'domains': set(),
        'paths': set()
    }
    
    for url in results:
        parsed = urlparse(url)
        analysis['domains'].add(parsed.netloc)
        analysis['paths'].add(parsed.path)
    
    return analysis

# Usage
files = search_and_process('example.com', 'pdf')
print(f"Found {len(files)} PDF files")

analysis = analyze_results(files)
print(f"Unique domains: {len(analysis['domains'])}")
print(f"Unique paths: {len(analysis['paths'])}")
# Start with basic domain information
nslookup example.com
whois example.com

# Check subdomains
python3 goofile.py -d example.com -f pdf
python3 goofile.py -d www.example.com -f pdf
# Common sensitive file types in order of interest
filetypes=(
  "pdf"
  "docx"
  "xlsx"
  "sql"
  "backup"
  "conf"
  "log"
)

for ftype in "${filetypes[@]}"; do
  echo "[*] Searching for $ftype..."
  python3 goofile.py -d example.com -f $ftype -l 50 >> findings.txt
done
# Compile and deduplicate results
cat findings.txt | sort -u > unique_findings.txt

# Group by file type
grep "\.pdf$" unique_findings.txt > pdfs.txt
grep "\.docx$" unique_findings.txt > docs.txt
grep "\.xlsx$" unique_findings.txt > sheets.txt

# Count by type
echo "PDFs: $(wc -l < pdfs.txt)"
echo "Docs: $(wc -l < docs.txt)"
echo "Sheets: $(wc -l < sheets.txt)"

Goofile automates the following Google dork searches:

# PDF files on domain
site:example.com filetype:pdf

# Word documents
site:example.com filetype:docx

# Excel spreadsheets
site:example.com filetype:xlsx

# Log files
site:example.com filetype:log

# Config files
site:example.com filetype:conf

# Backup files
site:example.com filetype:bak

# Combined search
site:example.com (filetype:pdf OR filetype:docx OR filetype:xlsx)
# Slower scan with delays
python3 goofile.py \
  -d example.com \
  -f pdf \
  --delay 3 \
  --timeout 15 \
  -l 50
# Split searches across time
python3 goofile.py -d example.com -f pdf -l 20 &
sleep 60
python3 goofile.py -d example.com -f docx -l 20 &
sleep 60
python3 goofile.py -d example.com -f xlsx -l 20 &

# Wait for all to complete
wait
ProblemSolution
No results foundCheck domain spelling, try different file types
Connection timeoutIncrease timeout: --timeout 20
”403 Forbidden”Google blocking requests, use proxy or reduce limit
No module foundInstall deps: pip3 install -r requirements.txt
Slow resultsResults depend on Google indexing, may take time
# Enable verbose output
python3 goofile.py -d example.com -f pdf -v

# Check Python version
python3 --version

# Verify internet connectivity
ping -c 1 google.com

# Test with simpler domain
python3 goofile.py -d google.com -f pdf
  • Only search domains you own or have written authorization to scan
  • Respect robots.txt and site terms of service
  • Use appropriate delays to avoid overloading servers
  • Do not download files without authorization
  • Document all findings and report responsibly
  • Files found through Goofile may contain sensitive information
  • Handle discovered data responsibly
  • Notify system administrators of findings
  • Follow responsible disclosure practices
# Combine with other reconnaissance
# 1. Enumerate domains
# 2. Run Goofile on each domain
# 3. Combine results with other tools

# Example: Nmap + Goofile workflow
nmap -sV example.com > nmap_results.txt
python3 goofile.py -d example.com -f pdf > goofile_results.txt
#!/bin/bash
# Complete reconnaissance script

TARGET=$1

echo "[*] Starting reconnaissance on $TARGET"

# DNS enumeration
nslookup $TARGET > recon/$TARGET.dns

# Goofile search
echo "[*] Running Goofile..."
python3 goofile.py -d $TARGET -f pdf > recon/$TARGET.pdfs &
python3 goofile.py -d $TARGET -f docx > recon/$TARGET.docs &
python3 goofile.py -d $TARGET -f xlsx > recon/$TARGET.sheets &
python3 goofile.py -d $TARGET -f sql > recon/$TARGET.sql &
python3 goofile.py -d $TARGET -f bak > recon/$TARGET.bak &

# Wait for all processes
wait

# Combine results
cat recon/$TARGET.* > recon/$TARGET.combined.txt

echo "[+] Reconnaissance complete. Results in recon/ directory"
ToolPurpose
MetagoofileSimilar Google dorks tool (older)
Google DorkingManual search using Google
CensysInternet-wide database search
ShodanIoT device search engine
OSINT FrameworkComprehensive OSINT toolkit