Zum Inhalt springen

Waybackurls

Waybackurls fetches all URLs of a domain from the Wayback Machine (archive.org). Useful for discovering endpoints, API paths, hidden parameters, and historical changes.

Installation

From GitHub Releases

# Linux
wget https://github.com/tomnomnom/waybackurls/releases/latest/download/waybackurls-linux-amd64
chmod +x waybackurls-linux-amd64
sudo mv waybackurls-linux-amd64 /usr/local/bin/waybackurls

# macOS
wget https://github.com/tomnomnom/waybackurls/releases/latest/download/waybackurls-darwin-amd64
chmod +x waybackurls-darwin-amd64
sudo mv waybackurls-darwin-amd64 /usr/local/bin/waybackurls

# Go Install
go install github.com/tomnomnom/waybackurls@latest

Verify Installation

waybackurls --help
echo "example.com" | waybackurls

Basic Usage

CommandDescription
echo "example.com" | waybackurlsFetch all URLs
cat domains.txt | waybackurlsMultiple domains
waybackurls example.comDirect argument
waybackurls -dates example.comInclude capture dates
waybackurls -no-subs example.comExclude subdomains
waybackurls example.com | wc -lCount URLs

URL Enumeration

Basic URL Discovery

# Fetch all archived URLs
echo "example.com" | waybackurls

# With dates for context
echo "example.com" | waybackurls -dates

# Save to file
echo "example.com" | waybackurls > urls.txt

# Count discovered URLs
echo "example.com" | waybackurls | wc -l

# Get unique URLs
echo "example.com" | waybackurls | sort -u

# Exclude subdomains (root domain only)
echo "example.com" | waybackurls -no-subs

Multiple Domains

# Process domain list
cat domains.txt | waybackurls > all_urls.txt

# With date information
cat domains.txt | waybackurls -dates > urls_with_dates.txt

# For each domain separately
while IFS= read -r domain; do
  echo "[*] Processing $domain..."
  echo $domain | waybackurls > urls_$domain.txt
  echo "  Found: $(wc -l < urls_$domain.txt) URLs"
done < domains.txt

Advanced Techniques

OSINT & Reconnaissance

Endpoint Discovery

#!/bin/bash
# Discover API endpoints and paths

TARGET="example.com"

echo "[*] Discovering historical URLs..."
waybackurls $TARGET > all_urls.txt

# Filter API endpoints
echo ""
echo "=== API Endpoints ==="
grep -iE "(api|rest|graphql|service)" all_urls.txt | sort -u

# Filter admin pages
echo ""
echo "=== Admin Paths ==="
grep -iE "(admin|dashboard|control|panel|manage)" all_urls.txt | sort -u

# Filter parameter usage
echo ""
echo "=== URLs with Parameters ==="
grep "?" all_urls.txt | head -20

# Identify file types
echo ""
echo "=== By File Type ==="
grep -oE "\.[a-z]{2,}$" all_urls.txt | sort | uniq -c | sort -rn

# Find potentially sensitive paths
echo ""
echo "=== Potentially Sensitive ==="
grep -iE "(backup|config|secret|key|token|password|private)" all_urls.txt

Parameter Discovery

#!/bin/bash
# Extract and analyze parameters

TARGET="example.com"

# Get all URLs with parameters
waybackurls $TARGET | grep "?" > params_urls.txt

# Extract parameter names
echo "=== All Parameters Found ==="
grep -oP '[?&][^=&]+=' params_urls.txt | sort -u | sed 's/^[?&]//' | sed 's/=$//'

# Find common parameters
echo ""
echo "=== Most Common Parameters ==="
grep -oP '[?&]\K[^=&]+(?=)' params_urls.txt | sort | uniq -c | sort -rn | head -20

# Find ID parameters
echo ""
echo "=== ID/Numeric Parameters ==="
grep -oP '=[0-9]+' params_urls.txt | sort -u

# Find API endpoints with multiple parameters
echo ""
echo "=== Complex URLs ==="
grep -E '\?.*&' params_urls.txt | head -10

Historical Changes Tracking

#!/bin/bash
# Track changes over time

TARGET="example.com"

# Get URLs with dates
waybackurls -dates $TARGET > urls_dated.txt

# Extract by date ranges
echo "=== 2023 URLs ==="
grep "2023" urls_dated.txt | cut -d' ' -f1 | sort -u | wc -l

echo ""
echo "=== 2024 URLs ==="
grep "2024" urls_dated.txt | cut -d' ' -f1 | sort -u | wc -l

# Find removed endpoints
echo ""
echo "=== Potentially Removed (last 30 days) ==="
CUTOFF=$(date -d "30 days ago" +%Y%m%d)
grep -v "$(date +%Y)" urls_dated.txt | grep -E "202[0-3]" | cut -d' ' -f1 | sort -u

# Find new URLs
echo ""
echo "=== Recently Added ==="
RECENT=$(date +%Y%m)
grep "$RECENT" urls_dated.txt | cut -d' ' -f1 | sort -u

Vulnerability Indicators

#!/bin/bash
# Find URLs potentially exposing vulnerabilities

TARGET="example.com"

waybackurls $TARGET > all_urls.txt

# SQL-related paths
echo "=== SQL-related Paths ==="
grep -iE "(sql|database|db)" all_urls.txt

# Debug/test paths
echo ""
echo "=== Debug/Test Paths ==="
grep -iE "(debug|test|staging|sandbox)" all_urls.txt

# API versioning (may indicate deprecated endpoints)
echo ""
echo "=== API Versions ==="
grep -iE "api/(v[0-9]|v[0-9]\.[0-9])" all_urls.txt | sort -u

# Backup/export endpoints
echo ""
echo "=== Export/Backup Endpoints ==="
grep -iE "(export|backup|download|archive)" all_urls.txt

# Unprotected endpoints
echo ""
echo "=== Unprotected Paths (no auth) ==="
grep -iE "(public|guest|anon)" all_urls.txt

# Common vulnerability targets
echo ""
echo "=== High-value Targets ==="
grep -iE "(config|settings|admin|secret|key|token|account)" all_urls.txt

Integration with Other Tools

Chain with Grep for Further Analysis

#!/bin/bash
# Deep parameter analysis

TARGET="example.com"

waybackurls $TARGET | grep "?" > param_urls.txt

# Find URLs with custom parameters (non-standard)
echo "=== Unusual Parameters ==="
grep -oP '[?&]\K[^=&]+' param_urls.txt | grep -v -E "^(id|page|sort|order|search|q|filter|limit)$" | sort | uniq -c | sort -rn | head -20

# Find numeric ID endpoints (IDOR potential)
echo ""
echo "=== Potential IDOR Endpoints ==="
grep -E "[?&]id=[0-9]+|/[0-9]+(/|$)" param_urls.txt | sort -u

# Find user/account related
echo ""
echo "=== Account/User Endpoints ==="
grep -iE "(user|account|profile|member)" param_urls.txt | sort -u

Chain with Curl for Content Analysis

#!/bin/bash
# Check if endpoints still exist

TARGET="example.com"
URLS="urls.txt"

waybackurls -no-subs $TARGET | head -100 > $URLS

echo "Checking endpoint status..."
while IFS= read -r url; do
  status=$(timeout 2 curl -s -o /dev/null -w "%{http_code}" "https://$url" 2>/dev/null)
  if [ "$status" = "200" ]; then
    echo "$url: $status"
  fi
done < $URLS | tee active_endpoints.txt

Subdomain Discovery via Wayback

#!/bin/bash
# Extract subdomains from archived URLs

TARGET="example.com"

waybackurls $TARGET | \
  grep -oE "https?://[^/]*\.$TARGET" | \
  sed "s|https://||; s|http://||" | \
  sort -u > subdomains_from_wayback.txt

echo "Subdomains found: $(wc -l < subdomains_from_wayback.txt)"

Filtering & Analysis

Extract Specific Information

#!/bin/bash
# Parse URLs for structured data

URLS="urls.txt"

# Extract all paths (no domain)
echo "=== All Paths ==="
sed "s|.*://[^/]*||" $URLS | sort -u | head -20

# Extract only API paths
echo ""
echo "=== API Paths ==="
grep -oE "/api/.*" $URLS | sort -u

# Extract login/auth pages
echo ""
echo "=== Auth Pages ==="
grep -iE "(login|auth|signin|signup|register)" $URLS

# Extract form submission endpoints
echo ""
echo "=== Form Endpoints ==="
grep -iE "\.php\?|\.asp\?|\.cfm\?" $URLS

# Find download endpoints
echo ""
echo "=== Download Endpoints ==="
grep -iE "(download|file|export|csv|pdf)" $URLS

Deduplication

# Remove duplicates
cat urls.txt | sort -u > unique_urls.txt

# Remove by path only (ignore domain)
sed "s|.*://[^/]*||" urls.txt | sort -u

# Case-insensitive deduplication
tr '[:upper:]' '[:lower:]' < urls.txt | sort -u

# Remove query string variations
sed "s/?.*$//" urls.txt | sort -u

Comparison with Current State

#!/bin/bash
# Compare Wayback URLs with current site

DOMAIN="example.com"

echo "[*] Fetching historical URLs..."
waybackurls -no-subs $DOMAIN | sort > historical.txt

echo "[*] Checking current endpoints..."
while IFS= read -r url; do
  curl -s -I "https://$url" 2>/dev/null | head -1
done < historical.txt | grep -c "200" > current_count.txt

echo "Historical endpoints: $(wc -l < historical.txt)"
echo "Still accessible: $(cat current_count.txt)"

Data Export & Reporting

Generate Reports

#!/bin/bash
# Comprehensive report

DOMAIN="example.com"
REPORT="wayback_report_${DOMAIN}_$(date +%Y%m%d).txt"

cat > $REPORT << EOF
=== WAYBACK MACHINE ENUMERATION REPORT ===
Domain: $DOMAIN
Date: $(date)
Tool: waybackurls

=== STATISTICS ===
EOF

TOTAL=$(waybackurls $DOMAIN | wc -l)
UNIQUE=$(waybackurls $DOMAIN | sort -u | wc -l)
DATED=$(waybackurls -dates $DOMAIN | wc -l)

cat >> $REPORT << EOF
Total URLs (with duplicates): $TOTAL
Unique URLs: $UNIQUE
Dated entries: $DATED

=== TOP 20 URLS ===
EOF

waybackurls $DOMAIN | sort -u | head -20 >> $REPORT

echo "[+] Report saved to $REPORT"

CSV Export

#!/bin/bash
# Export to CSV

DOMAIN="example.com"
OUTPUT="wayback_$(date +%Y%m%d).csv"

echo "url,first_seen,last_seen" > $OUTPUT

waybackurls -dates $DOMAIN | while read date url; do
  # Parse date and extract path
  path=$(echo "$url" | sed "s|.*://[^/]*||")
  echo "$path,$date,$(date +%Y-%m-%d)" >> $OUTPUT
done

echo "[+] Exported to $OUTPUT"

Best Practices

  • Run regularly to catch new historical data
  • Combine with current site reconnaissance
  • Look for deprecated endpoints that may still work
  • Search for parameter patterns that might expose vulnerabilities
  • Check for sensitive file exposures (backups, configs)
  • Analyze date ranges for when changes occurred
  • Look for API version patterns
  • Track removed endpoints (may indicate hidden functionality)

Common Issues

No Results

# Domain may have no Wayback archives
echo "example.com" | waybackurls

# Try with dates for debugging
echo "example.com" | waybackurls -dates

# Check if domain has any archives
curl -s "https://archive.org/wayback/available?url=example.com"

Too Many Results

# Filter to specific paths
waybackurls example.com | grep "/api/" | head -100

# Reduce to unique paths only
waybackurls example.com | sed "s|.*://[^/]*||" | sort -u | head -100

# Sample results
waybackurls example.com | sort -u | shuf | head -100

Resources


Last updated: 2026-03-30