Ir al contenido

Goodork

Goodork is a Python-based automated Google dorking tool that crafts and executes advanced Google Search operator queries to discover exposed information, misconfigurations, sensitive data, and potential vulnerabilities on indexed web pages. It automates the reconnaissance process and enables large-scale OSINT investigations.

Installation

# Clone repository
git clone https://github.com/mssaleh/goodork.git
cd goodork

# Install Python dependencies
pip3 install -r requirements.txt

# Make executable
chmod +x goodork.py

# Run with help
python3 goodork.py -h

Google Search Operators Reference

OperatorPurposeExample
site:Restrict to domainsite:example.com admin
inurl:URL contains terminurl:admin panel
intitle:Page title containsintitle:admin inurl:login
intext:Page body containsintext:password
filetype:Specific file typefiletype:pdf confidential
""Exact phrase match"api_key="
-Exclude termsite:example.com -help
*Wildcard characterinurl:admin*panel
..Rangeinurl:2019..2021
ORLogical ORsite:example.com OR site:api.example.com
cache:View cached versioncache:example.com
related:Related pagesrelated:example.com

Command Line Usage

Basic Queries

# Simple search with results limit
python3 goodork.py -q "site:target.com intitle:admin" --limit 100

# Save results to file
python3 goodork.py -q "site:target.com inurl:backup" -o results.txt

# JSON output format
python3 goodork.py -q "site:target.com" -f json -o results.json

# CSV format with headers
python3 goodork.py -q "site:target.com" -f csv -o results.csv

# Verbose output with timing
python3 goodork.py -q "site:target.com" -v

Subdomain Discovery

# Find subdomains
python3 goodork.py -q "site:*.target.com -www"

# Discover all subdomains
python3 goodork.py -q "site:*.target.com" --limit 500

# Find CDN subdomains
python3 goodork.py -q "site:*.cdn.target.com OR site:*.cdn-target.com"

# AWS S3 bucket discovery
python3 goodork.py -q 's3.amazonaws.com/target OR "target.s3.amazonaws.com"'

# Azure storage discovery
python3 goodork.py -q "target.blob.core.windows.net OR target-*.blob.core.windows.net"

Vulnerability & Misconfiguration Hunting

Admin Interface Discovery

# Find admin panels
python3 goodork.py -q "site:target.com intitle:admin"

# WordPress admin
python3 goodork.py -q "site:target.com/wp-admin"

# cPanel login
python3 goodork.py -q "site:target.com intitle:cPanel"

# Joomla administrator
python3 goodork.py -q "site:target.com/administrator"

# Plesk Panel
python3 goodork.py -q "site:target.com:8443 intitle:plesk"

Exposed Sensitive Data

# Exposed credentials
python3 goodork.py -q 'site:target.com intext:"username:" OR intext:"password:"'

# API keys and secrets
python3 goodork.py -q 'site:target.com "api_key=" OR "apikey=" OR "api-key="'

# AWS credentials
python3 goodork.py -q 'site:target.com "AKIA" OR "aws_secret_access_key"'

# Database connection strings
python3 goodork.py -q 'site:target.com "mysql://" OR "mongodb://" OR "postgresql://"'

# Private SSH keys
python3 goodork.py -q 'site:target.com "BEGIN RSA PRIVATE KEY" OR "BEGIN PRIVATE KEY"'

Exposed Files & Backups

# Database backups
python3 goodork.py -q "site:target.com filetype:sql OR filetype:db"

# Configuration files
python3 goodork.py -q "site:target.com filetype:conf OR filetype:config OR filetype:cfg"

# Backup files
python3 goodork.py -q "site:target.com filetype:bak OR filetype:backup OR filetype:old"

# Excel/Spreadsheets
python3 goodork.py -q "site:target.com filetype:xlsx OR filetype:xls OR filetype:csv"

# Archives
python3 goodork.py -q "site:target.com filetype:zip OR filetype:rar OR filetype:7z"

# Source code
python3 goodork.py -q "site:target.com filetype:js OR filetype:py OR filetype:java"

Misconfiguration Discovery

# Directory listing enabled
python3 goodork.py -q "site:target.com intitle:index.of"

# Exposed .git directory
python3 goodork.py -q "site:target.com/.git filetype:config"

# SVN exposed
python3 goodork.py -q "site:target.com/.svn"

# TFVC/TFS exposed
python3 goodork.py -q "site:target.com mapping"

# Exposed Docker files
python3 goodork.py -q "site:target.com Dockerfile OR docker-compose.yml"

API & Endpoint Discovery

# API endpoints
python3 goodork.py -q "site:target.com inurl:api/v"

# Swagger/OpenAPI exposed
python3 goodork.py -q "site:target.com inurl:swagger OR inurl:openapi"

# GraphQL endpoints
python3 goodork.py -q "site:target.com inurl:graphql"

# RESTful API paths
python3 goodork.py -q "site:target.com inurl:/api/v1 OR inurl:/api/v2"

# SOAP/XML endpoints
python3 goodork.py -q "site:target.com filetype:wsdl OR filetype:xml intext:soap"

Real-World Reconnaissance Workflow

Complete Target Enumeration

#!/bin/bash
# Comprehensive reconnaissance on target.com

TARGET="target.com"
OUTPUT_DIR="recon_$(date +%Y%m%d)"
mkdir -p "$OUTPUT_DIR"

echo "[*] Starting reconnaissance on $TARGET"

# 1. Subdomain discovery
echo "[*] Discovering subdomains..."
python3 goodork.py -q "site:*.$TARGET" -o "$OUTPUT_DIR/subdomains.txt"

# 2. Admin panel search
echo "[*] Searching for admin panels..."
python3 goodork.py -q "site:$TARGET intitle:admin" -o "$OUTPUT_DIR/admin_panels.txt"

# 3. Exposed files
echo "[*] Finding exposed files..."
python3 goodork.py -q "site:$TARGET filetype:pdf OR filetype:xlsx OR filetype:doc" \
  -o "$OUTPUT_DIR/exposed_files.txt"

# 4. Configuration files
echo "[*] Searching for config files..."
python3 goodork.py -q "site:$TARGET filetype:conf OR filetype:config OR .env" \
  -o "$OUTPUT_DIR/config_files.txt"

# 5. API endpoints
echo "[*] Discovering APIs..."
python3 goodork.py -q "site:$TARGET inurl:api" -o "$OUTPUT_DIR/api_endpoints.txt"

# 6. Sensitive data
echo "[*] Hunting for exposed data..."
python3 goodork.py -q "site:$TARGET password OR apikey OR token" \
  -o "$OUTPUT_DIR/sensitive_data.txt"

# 7. Database backups
echo "[*] Searching for backups..."
python3 goodork.py -q "site:$TARGET filetype:sql OR filetype:backup" \
  -o "$OUTPUT_DIR/backups.txt"

# Consolidate results
echo "[*] Consolidating findings..."
cat "$OUTPUT_DIR"/*.txt | sort -u > "$OUTPUT_DIR/all_findings.txt"

echo "[+] Reconnaissance complete"
echo "[+] Results saved to $OUTPUT_DIR"
echo "[+] Total unique findings: $(wc -l < $OUTPUT_DIR/all_findings.txt)"

Focused Domain Reconnaissance

# Quick vulnerability indicators
python3 goodork.py -q "site:target.com intitle:debug" -o debug.txt
python3 goodork.py -q "site:target.com inurl:admin.php" -o admin_php.txt
python3 goodork.py -q "site:target.com inurl:config.php" -o config_php.txt
python3 goodork.py -q "site:target.com intitle:login" -o login_pages.txt
python3 goodork.py -q "site:target.com intext:©" -o copyright_pages.txt

# Consolidate
cat *.txt | sort -u > findings.txt

Custom Dork Collections

Authentication & Credentials

# Create dorks.txt
cat > dorks.txt << 'EOF'
site:target.com intext:"username:" intext:"password:"
site:target.com "admin" "password"
site:target.com filetype:htpasswd
site:target.com filetype:credentials
site:target.com intext:"user:" intext:"pass:"
site:target.com intitle:login
site:target.com/.env
site:target.com/.htpasswd
EOF

# Run collection
python3 goodork.py -d dorks.txt -o auth_results.txt

Data Exfiltration

cat > data_dorks.txt << 'EOF'
site:target.com filetype:csv
site:target.com filetype:sql
site:target.com filetype:xlsx
site:target.com filetype:json intext:"password"
site:target.com "database backup"
site:target.com "customer data"
site:target.com "confidential"
site:target.com "internal use only"
EOF

python3 goodork.py -d data_dorks.txt -o data_findings.txt

Rate Limiting & Detection Evasion

# Add delay between requests
python3 goodork.py -q "site:target.com" --delay 3 --limit 50

# Randomize user-agent
python3 goodork.py -q "site:target.com" --random-user-agent

# Use proxy rotation
python3 goodork.py -q "site:target.com" --proxy-file proxies.txt

# Slow mode with logging
python3 goodork.py -q "site:target.com" --slow --verbose

Output Analysis

Parse and Filter Results

# Extract only URLs
grep -oE 'https?://[^\s]+' results.txt | sort -u > urls_only.txt

# Filter by subdomain
grep "sub.target.com" results.txt > subdomain_results.txt

# Find sensitive patterns
grep -iE "password|apikey|token|secret" results.txt > sensitive.txt

# Domain-based filtering
cat results.txt | sed 's|.*://\([^/]*\).*|\1|' | sort -u > domains.txt

# Python processing
python3 << 'EOF'
import json
with open('results.json') as f:
    data = json.load(f)
    for item in data:
        if 'api' in item['url'].lower():
            print(item['url'])
EOF

Integration with Other Tools

Feed to Web Scanners

# Extract URLs for Burp Suite
python3 goodork.py -q "site:target.com" -f json | \
  jq -r '.[] | .url' | sort -u > burp_scope.txt

# Nuclei scanning
python3 goodork.py -q "site:target.com" -f json | \
  jq -r '.[] | .url' | nuclei -l - -t templates/

# OWASP ZAP
python3 goodork.py -q "site:target.com" -o urls.txt
# Import urls.txt into ZAP

Best Practices

  • Respect Google’s Terms of Service and robots.txt
  • Implement delays between searches to avoid rate limiting
  • Use legitimate user agents and headers
  • Verify findings manually before reporting
  • Document all search queries used
  • Filter false positives from results
  • Combine with other OSINT tools for validation
  • Obtain proper authorization before conducting reconnaissance
  • Monitor Google Search for changes to operators

References


Last updated: 2026-03-30