sq Cheat Sheet

Overview

sq is a command-line tool for querying, joining, and transforming data from structured data sources including SQL databases (PostgreSQL, MySQL, SQLite, SQL Server), spreadsheets (Excel, CSV, TSV), and document formats (JSON, JSONL, XML). It provides a jq-inspired query language that works uniformly across all these sources, allowing you to query a CSV file with the same syntax as a PostgreSQL database, and even join data across different source types.

sq fills the gap between simple tools like jq (JSON-only) and full database clients by providing a universal query interface for any tabular data. It supports schema inspection, data piping between sources, format conversion, and cross-source joins. This makes it invaluable for quick data exploration, ETL scripting, and ad-hoc analysis where data lives across multiple formats and databases.

Installation

# macOS
brew install neilotoole/sq/sq

# Linux (Debian/Ubuntu)
curl -fsSL https://github.com/neilotoole/sq/releases/latest/download/sq-linux-amd64.deb -o sq.deb
sudo dpkg -i sq.deb

# Linux (RPM)
curl -fsSL https://github.com/neilotoole/sq/releases/latest/download/sq-linux-amd64.rpm -o sq.rpm
sudo rpm -i sq.rpm

# Go install
go install github.com/neilotoole/sq@latest

# Windows (Scoop)
scoop install sq

# Docker
docker pull neilotoole/sq:latest
docker run --rm neilotoole/sq version

# Verify installation
sq version

Core Commands — Source Management

Adding Data Sources

# Add a PostgreSQL database
sq add "postgres://user:password@localhost:5432/mydb" --handle @prod_db

# Add a MySQL database
sq add "mysql://user:password@localhost:3306/mydb" --handle @mysql_db

# Add a SQLite database
sq add "sqlite3:///path/to/database.db" --handle @local_db

# Add a SQL Server database
sq add "sqlserver://user:password@host:1433?database=mydb" --handle @mssql_db

# Add a CSV file
sq add ./data/users.csv --handle @users_csv

# Add an Excel file
sq add ./reports/quarterly.xlsx --handle @q4_report

# Add a JSON file
sq add ./data/events.json --handle @events

# Add a JSONL (newline-delimited JSON) file
sq add ./logs/access.jsonl --handle @access_logs

# Add a TSV file
sq add ./data/export.tsv --handle @export_data

# Add from URL
sq add "https://example.com/data.csv" --handle @remote_csv

# List all sources
sq ls

# Show source details
sq inspect @prod_db

# Set active/default source
sq src @prod_db

# Remove a source
sq rm @old_source

# Rename a source
sq mv @old_name @new_name

Schema Inspection

# Inspect database schema (all tables)
sq inspect @prod_db

# Inspect specific table
sq inspect @prod_db.users

# Show column details
sq inspect @prod_db.users --verbose

# Show table row counts
sq inspect @prod_db --overview

# Inspect a CSV file structure
sq inspect @users_csv

# Inspect Excel workbook (lists sheets)
sq inspect @q4_report

# JSON output for schema
sq inspect @prod_db --json

Core Commands — Querying Data

Basic Queries

# Query a database table
sq @prod_db.users

# Query with column selection
sq ".users | .name, .email, .created_at"

# Query CSV file
sq @users_csv

# Query specific Excel sheet
sq "@q4_report.Sheet1"

# Limit results
sq ".users | .[0:10]"

# Count rows
sq ".users | count()"

# Distinct values
sq ".users | .country | distinct()"

# Sort results
sq ".users | order_by(.created_at)"
sq ".users | order_by(-.created_at)"  # descending

Filtering and Conditions

# Filter rows (WHERE equivalent)
sq ".users | .status == 'active'"

# Multiple conditions
sq ".users | .status == 'active' && .country == 'US'"

# Numeric comparisons
sq ".orders | .total > 100"

# Pattern matching (LIKE)
sq ".users | .email ~= '%@gmail.com'"

# NULL checks
sq ".users | .deleted_at == null"

# IN equivalent
sq ".users | .role in ('admin', 'manager')"

# Combine filter with column selection
sq ".users | .status == 'active' | .name, .email"

# Complex filter
sq ".orders | .status == 'completed' && .total > 50 | .customer_id, .total, .created_at | order_by(-.total) | .[0:20]"

Aggregation

# Count by group
sq ".users | .country, count()"

# Sum and average
sq ".orders | sum(.total)"
sq ".orders | avg(.total)"

# Group by with aggregation
sq ".orders | .status, count(), sum(.total), avg(.total)"

# Min and max
sq ".orders | min(.total), max(.total)"

# Multiple aggregations
sq ".orders | .customer_id, count() as order_count, sum(.total) as total_spent | order_by(-total_spent) | .[0:10]"

Joins

# Join two tables in same database
sq ".users | join(.orders, .id == .orders.user_id) | .users.name, .orders.total"

# Cross-source join (CSV + database)
sq "@users_csv | join(@prod_db.orders, .user_id == .orders.user_id) | .name, .orders.total"

# Join Excel with database
sq "@q4_report.Targets | join(@prod_db.sales, .region == .sales.region) | .region, .target, .sales.actual"

Core Commands — Output Formats

# Default table output
sq ".users | .[0:5]"

# JSON output
sq ".users | .[0:5]" --json
sq ".users | .[0:5]" -j

# JSON array output
sq ".users | .[0:5]" --jsona

# JSONL (newline-delimited JSON)
sq ".users | .[0:5]" --jsonl

# CSV output
sq ".users | .[0:5]" --csv

# TSV output
sq ".users | .[0:5]" --tsv

# Excel output
sq ".users" --xlsx --output users_export.xlsx

# Markdown table
sq ".users | .[0:5]" --markdown

# HTML table
sq ".users | .[0:5]" --html

# Raw (single value)
sq ".users | count()" --raw

# Insert into another database
sq ".users | .status == 'active'" --insert @backup_db.active_users

Configuration

Configuration File

# ~/.config/sq/sq.yml
defaults:
  output:
    format: table
    header: true
  add:
    active: true

sources:
  - handle: "@prod_db"
    driver: postgres
    location: "postgres://user:pass@prod-host:5432/app"
  - handle: "@staging_db"
    driver: postgres
    location: "postgres://user:pass@staging-host:5432/app"

active:
  source: "@prod_db"

Driver Configuration

# Check available drivers
sq driver ls

# Supported drivers:
# postgres    — PostgreSQL
# mysql       — MySQL / MariaDB
# sqlite3     — SQLite
# sqlserver   — Microsoft SQL Server
# csv         — CSV files
# tsv         — TSV files
# json        — JSON files
# jsonl       — JSON Lines
# xlsx        — Excel (.xlsx)
# xml         — XML files

Advanced Usage

Data Pipeline and ETL

# Copy table between databases
sq @prod_db.users --insert @analytics_db.users_copy

# Export database table to CSV
sq @prod_db.users --csv > users_backup.csv

# Import CSV into database
sq @users_csv --insert @staging_db.imported_users

# Transform and load
sq ".orders | .status == 'completed' | .customer_id, sum(.total) as total_spent" \
  --insert @analytics_db.customer_totals

# Convert between formats
sq @data.json --csv > data.csv
sq @data.csv --xlsx --output data.xlsx
sq @spreadsheet.xlsx --json > data.json

# Pipe data between sources
sq @prod_db.users | sq add --handle @piped_users

Scripting and Automation

#!/bin/bash
# daily-report.sh — Generate daily sales report

DATE=$(date -d yesterday +%Y-%m-%d)

# Query daily sales
sq ".orders | .created_at >= '$DATE' && .status == 'completed' | \
  .product_category, count() as orders, sum(.total) as revenue | \
  order_by(-revenue)" \
  --csv > "/reports/daily-sales-${DATE}.csv"

# Cross-reference with targets
sq "@/reports/daily-sales-${DATE}.csv | \
  join(@targets.xlsx.Q2, .product_category == .Q2.category) | \
  .product_category, .orders, .revenue, .Q2.target, \
  (.revenue / .Q2.target * 100) as pct_of_target" \
  --xlsx --output "/reports/daily-report-${DATE}.xlsx"

echo "Report generated for $DATE"

Complex Queries

# Subquery pattern
sq ".users | .id in (select .user_id from .orders where .total > 1000) | .name, .email"

# Window functions (on supported databases)
sq "select name, total, rank() over (order by total desc) as rank from orders limit 20"

# Raw SQL passthrough
sq sql @prod_db "SELECT u.name, COUNT(o.id) as order_count FROM users u LEFT JOIN orders o ON u.id = o.user_id GROUP BY u.name ORDER BY order_count DESC LIMIT 10"

# Multiple queries in script
sq sql @prod_db "
  CREATE TEMP TABLE active_users AS SELECT * FROM users WHERE status = 'active';
  SELECT country, COUNT(*) FROM active_users GROUP BY country ORDER BY count DESC;
"

Troubleshooting

Issue	Cause	Solution
Connection refused	Database not running or wrong host/port	Verify database is accessible and connection string is correct
Permission denied on file	File permissions	Check file read permissions: `ls -la file.csv`
CSV parsing errors	Wrong delimiter or encoding	Try `sq add file.csv --opts.delim=";"` or check encoding
Slow queries on large files	No indexing for flat files	Use database source for large datasets, or limit results
Excel sheet not found	Wrong sheet name	Use `sq inspect @file.xlsx` to see available sheets
Join producing no results	Column type mismatch	Ensure join columns have matching types
JSON nested objects	sq expects flat tabular data	Use `jq` to flatten first, then pipe to sq
Out of memory	Large file loaded entirely	Use `--limit` or filter early in the query

# Debug: verbose output
sq ".users | .[0:3]" --verbose

# Debug: show generated SQL
sq ".users | .status == 'active'" --verbose 2>&1 | grep SQL

# Test source connectivity
sq ping @prod_db

# Show sq configuration
sq config ls

# Reset configuration
sq config set defaults.output.format table