Waybackurls
Waybackurls fetches all URLs of a domain from the Wayback Machine (archive.org). Useful for discovering endpoints, API paths, hidden parameters, and historical changes.
Installation
From GitHub Releases
# Linux
wget https://github.com/tomnomnom/waybackurls/releases/latest/download/waybackurls-linux-amd64
chmod +x waybackurls-linux-amd64
sudo mv waybackurls-linux-amd64 /usr/local/bin/waybackurls
# macOS
wget https://github.com/tomnomnom/waybackurls/releases/latest/download/waybackurls-darwin-amd64
chmod +x waybackurls-darwin-amd64
sudo mv waybackurls-darwin-amd64 /usr/local/bin/waybackurls
# Go Install
go install github.com/tomnomnom/waybackurls@latest
Verify Installation
waybackurls --help
echo "example.com" | waybackurls
Basic Usage
| Command | Description |
|---|---|
echo "example.com" | waybackurls | Fetch all URLs |
cat domains.txt | waybackurls | Multiple domains |
waybackurls example.com | Direct argument |
waybackurls -dates example.com | Include capture dates |
waybackurls -no-subs example.com | Exclude subdomains |
waybackurls example.com | wc -l | Count URLs |
URL Enumeration
Basic URL Discovery
# Fetch all archived URLs
echo "example.com" | waybackurls
# With dates for context
echo "example.com" | waybackurls -dates
# Save to file
echo "example.com" | waybackurls > urls.txt
# Count discovered URLs
echo "example.com" | waybackurls | wc -l
# Get unique URLs
echo "example.com" | waybackurls | sort -u
# Exclude subdomains (root domain only)
echo "example.com" | waybackurls -no-subs
Multiple Domains
# Process domain list
cat domains.txt | waybackurls > all_urls.txt
# With date information
cat domains.txt | waybackurls -dates > urls_with_dates.txt
# For each domain separately
while IFS= read -r domain; do
echo "[*] Processing $domain..."
echo $domain | waybackurls > urls_$domain.txt
echo " Found: $(wc -l < urls_$domain.txt) URLs"
done < domains.txt
Advanced Techniques
OSINT & Reconnaissance
Endpoint Discovery
#!/bin/bash
# Discover API endpoints and paths
TARGET="example.com"
echo "[*] Discovering historical URLs..."
waybackurls $TARGET > all_urls.txt
# Filter API endpoints
echo ""
echo "=== API Endpoints ==="
grep -iE "(api|rest|graphql|service)" all_urls.txt | sort -u
# Filter admin pages
echo ""
echo "=== Admin Paths ==="
grep -iE "(admin|dashboard|control|panel|manage)" all_urls.txt | sort -u
# Filter parameter usage
echo ""
echo "=== URLs with Parameters ==="
grep "?" all_urls.txt | head -20
# Identify file types
echo ""
echo "=== By File Type ==="
grep -oE "\.[a-z]{2,}$" all_urls.txt | sort | uniq -c | sort -rn
# Find potentially sensitive paths
echo ""
echo "=== Potentially Sensitive ==="
grep -iE "(backup|config|secret|key|token|password|private)" all_urls.txt
Parameter Discovery
#!/bin/bash
# Extract and analyze parameters
TARGET="example.com"
# Get all URLs with parameters
waybackurls $TARGET | grep "?" > params_urls.txt
# Extract parameter names
echo "=== All Parameters Found ==="
grep -oP '[?&][^=&]+=' params_urls.txt | sort -u | sed 's/^[?&]//' | sed 's/=$//'
# Find common parameters
echo ""
echo "=== Most Common Parameters ==="
grep -oP '[?&]\K[^=&]+(?=)' params_urls.txt | sort | uniq -c | sort -rn | head -20
# Find ID parameters
echo ""
echo "=== ID/Numeric Parameters ==="
grep -oP '=[0-9]+' params_urls.txt | sort -u
# Find API endpoints with multiple parameters
echo ""
echo "=== Complex URLs ==="
grep -E '\?.*&' params_urls.txt | head -10
Historical Changes Tracking
#!/bin/bash
# Track changes over time
TARGET="example.com"
# Get URLs with dates
waybackurls -dates $TARGET > urls_dated.txt
# Extract by date ranges
echo "=== 2023 URLs ==="
grep "2023" urls_dated.txt | cut -d' ' -f1 | sort -u | wc -l
echo ""
echo "=== 2024 URLs ==="
grep "2024" urls_dated.txt | cut -d' ' -f1 | sort -u | wc -l
# Find removed endpoints
echo ""
echo "=== Potentially Removed (last 30 days) ==="
CUTOFF=$(date -d "30 days ago" +%Y%m%d)
grep -v "$(date +%Y)" urls_dated.txt | grep -E "202[0-3]" | cut -d' ' -f1 | sort -u
# Find new URLs
echo ""
echo "=== Recently Added ==="
RECENT=$(date +%Y%m)
grep "$RECENT" urls_dated.txt | cut -d' ' -f1 | sort -u
Vulnerability Indicators
#!/bin/bash
# Find URLs potentially exposing vulnerabilities
TARGET="example.com"
waybackurls $TARGET > all_urls.txt
# SQL-related paths
echo "=== SQL-related Paths ==="
grep -iE "(sql|database|db)" all_urls.txt
# Debug/test paths
echo ""
echo "=== Debug/Test Paths ==="
grep -iE "(debug|test|staging|sandbox)" all_urls.txt
# API versioning (may indicate deprecated endpoints)
echo ""
echo "=== API Versions ==="
grep -iE "api/(v[0-9]|v[0-9]\.[0-9])" all_urls.txt | sort -u
# Backup/export endpoints
echo ""
echo "=== Export/Backup Endpoints ==="
grep -iE "(export|backup|download|archive)" all_urls.txt
# Unprotected endpoints
echo ""
echo "=== Unprotected Paths (no auth) ==="
grep -iE "(public|guest|anon)" all_urls.txt
# Common vulnerability targets
echo ""
echo "=== High-value Targets ==="
grep -iE "(config|settings|admin|secret|key|token|account)" all_urls.txt
Integration with Other Tools
Chain with Grep for Further Analysis
#!/bin/bash
# Deep parameter analysis
TARGET="example.com"
waybackurls $TARGET | grep "?" > param_urls.txt
# Find URLs with custom parameters (non-standard)
echo "=== Unusual Parameters ==="
grep -oP '[?&]\K[^=&]+' param_urls.txt | grep -v -E "^(id|page|sort|order|search|q|filter|limit)$" | sort | uniq -c | sort -rn | head -20
# Find numeric ID endpoints (IDOR potential)
echo ""
echo "=== Potential IDOR Endpoints ==="
grep -E "[?&]id=[0-9]+|/[0-9]+(/|$)" param_urls.txt | sort -u
# Find user/account related
echo ""
echo "=== Account/User Endpoints ==="
grep -iE "(user|account|profile|member)" param_urls.txt | sort -u
Chain with Curl for Content Analysis
#!/bin/bash
# Check if endpoints still exist
TARGET="example.com"
URLS="urls.txt"
waybackurls -no-subs $TARGET | head -100 > $URLS
echo "Checking endpoint status..."
while IFS= read -r url; do
status=$(timeout 2 curl -s -o /dev/null -w "%{http_code}" "https://$url" 2>/dev/null)
if [ "$status" = "200" ]; then
echo "$url: $status"
fi
done < $URLS | tee active_endpoints.txt
Subdomain Discovery via Wayback
#!/bin/bash
# Extract subdomains from archived URLs
TARGET="example.com"
waybackurls $TARGET | \
grep -oE "https?://[^/]*\.$TARGET" | \
sed "s|https://||; s|http://||" | \
sort -u > subdomains_from_wayback.txt
echo "Subdomains found: $(wc -l < subdomains_from_wayback.txt)"
Filtering & Analysis
Extract Specific Information
#!/bin/bash
# Parse URLs for structured data
URLS="urls.txt"
# Extract all paths (no domain)
echo "=== All Paths ==="
sed "s|.*://[^/]*||" $URLS | sort -u | head -20
# Extract only API paths
echo ""
echo "=== API Paths ==="
grep -oE "/api/.*" $URLS | sort -u
# Extract login/auth pages
echo ""
echo "=== Auth Pages ==="
grep -iE "(login|auth|signin|signup|register)" $URLS
# Extract form submission endpoints
echo ""
echo "=== Form Endpoints ==="
grep -iE "\.php\?|\.asp\?|\.cfm\?" $URLS
# Find download endpoints
echo ""
echo "=== Download Endpoints ==="
grep -iE "(download|file|export|csv|pdf)" $URLS
Deduplication
# Remove duplicates
cat urls.txt | sort -u > unique_urls.txt
# Remove by path only (ignore domain)
sed "s|.*://[^/]*||" urls.txt | sort -u
# Case-insensitive deduplication
tr '[:upper:]' '[:lower:]' < urls.txt | sort -u
# Remove query string variations
sed "s/?.*$//" urls.txt | sort -u
Comparison with Current State
#!/bin/bash
# Compare Wayback URLs with current site
DOMAIN="example.com"
echo "[*] Fetching historical URLs..."
waybackurls -no-subs $DOMAIN | sort > historical.txt
echo "[*] Checking current endpoints..."
while IFS= read -r url; do
curl -s -I "https://$url" 2>/dev/null | head -1
done < historical.txt | grep -c "200" > current_count.txt
echo "Historical endpoints: $(wc -l < historical.txt)"
echo "Still accessible: $(cat current_count.txt)"
Data Export & Reporting
Generate Reports
#!/bin/bash
# Comprehensive report
DOMAIN="example.com"
REPORT="wayback_report_${DOMAIN}_$(date +%Y%m%d).txt"
cat > $REPORT << EOF
=== WAYBACK MACHINE ENUMERATION REPORT ===
Domain: $DOMAIN
Date: $(date)
Tool: waybackurls
=== STATISTICS ===
EOF
TOTAL=$(waybackurls $DOMAIN | wc -l)
UNIQUE=$(waybackurls $DOMAIN | sort -u | wc -l)
DATED=$(waybackurls -dates $DOMAIN | wc -l)
cat >> $REPORT << EOF
Total URLs (with duplicates): $TOTAL
Unique URLs: $UNIQUE
Dated entries: $DATED
=== TOP 20 URLS ===
EOF
waybackurls $DOMAIN | sort -u | head -20 >> $REPORT
echo "[+] Report saved to $REPORT"
CSV Export
#!/bin/bash
# Export to CSV
DOMAIN="example.com"
OUTPUT="wayback_$(date +%Y%m%d).csv"
echo "url,first_seen,last_seen" > $OUTPUT
waybackurls -dates $DOMAIN | while read date url; do
# Parse date and extract path
path=$(echo "$url" | sed "s|.*://[^/]*||")
echo "$path,$date,$(date +%Y-%m-%d)" >> $OUTPUT
done
echo "[+] Exported to $OUTPUT"
Best Practices
- Run regularly to catch new historical data
- Combine with current site reconnaissance
- Look for deprecated endpoints that may still work
- Search for parameter patterns that might expose vulnerabilities
- Check for sensitive file exposures (backups, configs)
- Analyze date ranges for when changes occurred
- Look for API version patterns
- Track removed endpoints (may indicate hidden functionality)
Common Issues
No Results
# Domain may have no Wayback archives
echo "example.com" | waybackurls
# Try with dates for debugging
echo "example.com" | waybackurls -dates
# Check if domain has any archives
curl -s "https://archive.org/wayback/available?url=example.com"
Too Many Results
# Filter to specific paths
waybackurls example.com | grep "/api/" | head -100
# Reduce to unique paths only
waybackurls example.com | sed "s|.*://[^/]*||" | sort -u | head -100
# Sample results
waybackurls example.com | sort -u | shuf | head -100
Resources
Last updated: 2026-03-30