Installation
# Ubuntu / Debian
sudo apt install postgresql postgresql-contrib
# macOS (Homebrew)
brew install postgresql@16
brew services start postgresql@16
# Docker
docker run -d \
--name postgres \
-e POSTGRES_PASSWORD=secret \
-p 5432:5432 \
postgres:16
# Docker Compose
services:
db:
image: postgres:16
environment:
POSTGRES_DB: myapp
POSTGRES_USER: myuser
POSTGRES_PASSWORD: secret
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
volumes:
pgdata:
Check version:
psql --version
pg_config --version
Configuration
Key configuration files (find with SHOW config_file;):
postgresql.conf — Core Settings
# Connection
listen_addresses = '*'
port = 5432
max_connections = 200
# Memory (tune to 25% of RAM for shared_buffers)
shared_buffers = 4GB
effective_cache_size = 12GB
work_mem = 64MB
maintenance_work_mem = 1GB
# WAL / replication
wal_level = replica
max_wal_senders = 5
wal_keep_size = 1GB
# Logging
log_min_duration_statement = 1000 # log queries > 1s
log_checkpoints = on
log_connections = on
log_lock_waits = on
log_temp_files = 0
# Autovacuum
autovacuum = on
autovacuum_vacuum_scale_factor = 0.05
autovacuum_analyze_scale_factor = 0.02
# Parallelism
max_parallel_workers_per_gather = 4
max_parallel_workers = 8
pg_hba.conf — Access Control
# TYPE DATABASE USER ADDRESS METHOD
local all postgres peer
local all all md5
host all all 0.0.0.0/0 scram-sha-256
host all all ::1/128 scram-sha-256
Core Commands
| Command | Description |
|---|
psql -U user -d dbname | Connect to database |
psql -h host -p 5432 -U user -d db | Connect with host/port |
\l | List databases |
\c dbname | Switch database |
\dt | List tables in current schema |
\dt schema.* | List tables in a schema |
\d tablename | Describe table (columns, indexes) |
\di | List indexes |
\dv | List views |
\df | List functions |
\dn | List schemas |
\du | List roles/users |
\timing | Toggle query timing |
\e | Edit query in $EDITOR |
\i file.sql | Execute SQL file |
\o file.txt | Send output to file |
\copy | Client-side COPY (no superuser) |
\x | Toggle expanded display |
\watch 2 | Re-run last query every 2 seconds |
\q | Quit psql |
Database and User Management
| Command | Description |
|---|
CREATE DATABASE mydb; | Create database |
DROP DATABASE mydb; | Drop database |
CREATE USER alice WITH PASSWORD 'pw'; | Create user |
ALTER USER alice WITH SUPERUSER; | Grant superuser |
GRANT ALL ON DATABASE mydb TO alice; | Grant DB access |
GRANT SELECT ON ALL TABLES IN SCHEMA public TO alice; | Grant read |
REVOKE ALL ON TABLE orders FROM alice; | Revoke access |
DROP USER alice; | Drop user |
CREATE SCHEMA myschema AUTHORIZATION alice; | Create schema |
SET search_path TO myschema, public; | Set schema search path |
DDL — Table Operations
| Command | Description |
|---|
CREATE TABLE t (...) | Create table |
ALTER TABLE t ADD COLUMN col type; | Add column |
ALTER TABLE t DROP COLUMN col; | Drop column |
ALTER TABLE t ALTER COLUMN col TYPE text; | Change type |
ALTER TABLE t RENAME COLUMN old TO new; | Rename column |
ALTER TABLE t ADD CONSTRAINT pk PRIMARY KEY (id); | Add PK |
ALTER TABLE t ADD CONSTRAINT fk FOREIGN KEY (...) | Add FK |
TRUNCATE TABLE t; | Empty table (fast) |
DROP TABLE t CASCADE; | Drop table and dependents |
CREATE TABLE t2 AS SELECT * FROM t1; | Create from query |
DML — Data Operations
| Command | Description |
|---|
INSERT INTO t (a,b) VALUES (1,'x'); | Insert row |
INSERT INTO t SELECT * FROM other; | Insert from query |
UPDATE t SET col=val WHERE cond; | Update rows |
DELETE FROM t WHERE cond; | Delete rows |
UPSERT / INSERT ... ON CONFLICT DO UPDATE | Upsert |
RETURNING * | Return affected rows |
Indexes
| Command | Description |
|---|
CREATE INDEX idx ON t (col); | Standard B-tree index |
CREATE UNIQUE INDEX ON t (col); | Unique index |
CREATE INDEX CONCURRENTLY ON t (col); | Non-blocking index |
CREATE INDEX ON t USING HASH (col); | Hash index |
CREATE INDEX ON t USING GIN (col); | GIN (JSONB, arrays) |
CREATE INDEX ON t USING BRIN (col); | BRIN (time-series) |
CREATE INDEX ON t (a) WHERE active = true; | Partial index |
CREATE INDEX ON t (lower(email)); | Functional index |
DROP INDEX CONCURRENTLY idx; | Drop without locking |
REINDEX TABLE t; | Rebuild all indexes |
Advanced Usage
JSONB Queries
-- Create table with JSONB column
CREATE TABLE events (
id SERIAL PRIMARY KEY,
payload JSONB NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Insert JSONB
INSERT INTO events (payload) VALUES
('{"type": "login", "user": "alice", "ip": "1.2.3.4"}'),
('{"type": "purchase", "amount": 99.99, "items": [1,2,3]}');
-- Query operators
SELECT payload->>'user' AS username FROM events; -- text value
SELECT payload->'items'->0 AS first_item FROM events; -- nested
SELECT * FROM events WHERE payload->>'type' = 'login'; -- filter
SELECT * FROM events WHERE payload @> '{"type":"purchase"}'; -- containment
SELECT * FROM events WHERE payload ? 'user'; -- key exists
SELECT * FROM events WHERE payload #>> '{items,0}' = '1'; -- path
-- GIN index for fast JSONB queries
CREATE INDEX ON events USING GIN (payload);
CREATE INDEX ON events USING GIN (payload jsonb_path_ops); -- smaller, @> only
-- Aggregate JSONB
SELECT payload->>'type', COUNT(*) FROM events GROUP BY 1;
-- Update JSONB
UPDATE events SET payload = payload || '{"processed": true}';
UPDATE events SET payload = payload - 'ip'; -- remove key
CTEs (Common Table Expressions)
-- Basic CTE
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) AS month,
SUM(amount) AS total
FROM orders
GROUP BY 1
)
SELECT month, total, total - LAG(total) OVER (ORDER BY month) AS delta
FROM monthly_sales;
-- Recursive CTE — org hierarchy
WITH RECURSIVE org_tree AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 0 AS depth
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive step
SELECT e.id, e.name, e.manager_id, t.depth + 1
FROM employees e
JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree ORDER BY depth, name;
-- Writable CTE
WITH deleted AS (
DELETE FROM events
WHERE created_at < NOW() - INTERVAL '90 days'
RETURNING *
)
INSERT INTO archive SELECT * FROM deleted;
Window Functions
-- Rank within groups
SELECT
department,
employee,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num
FROM employees;
-- Running totals and moving average
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total,
AVG(amount) OVER (ORDER BY order_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS moving_avg_7d
FROM orders;
-- Lead/Lag for comparisons
SELECT
month,
revenue,
LAG(revenue, 1) OVER (ORDER BY month) AS prev_month,
revenue - LAG(revenue, 1) OVER (ORDER BY month) AS change,
LEAD(revenue, 1) OVER (ORDER BY month) AS next_month
FROM monthly_revenue;
-- NTILE for percentiles
SELECT *, NTILE(4) OVER (ORDER BY score) AS quartile FROM test_scores;
EXPLAIN and Query Analysis
-- Basic explain
EXPLAIN SELECT * FROM orders WHERE user_id = 42;
-- Analyze (actually runs the query)
EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 42;
-- Full verbose output
EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT JSON)
SELECT * FROM orders WHERE user_id = 42;
-- Find slow queries (requires pg_stat_statements)
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
SELECT query, calls, total_exec_time/calls AS avg_ms, rows/calls AS avg_rows
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 20;
-- Missing index candidates
SELECT schemaname, tablename, attname, n_distinct, correlation
FROM pg_stats
WHERE tablename = 'orders' AND attname = 'user_id';
-- Table/index bloat
SELECT
tablename,
pg_size_pretty(pg_total_relation_size(tablename::regclass)) AS total_size,
pg_size_pretty(pg_relation_size(tablename::regclass)) AS table_size,
pg_size_pretty(pg_indexes_size(tablename::regclass)) AS index_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;
Backup and Restore
# Backup single database (plain SQL)
pg_dump -U postgres -d mydb -f mydb.sql
# Backup in custom format (compressed, supports selective restore)
pg_dump -U postgres -d mydb -Fc -f mydb.dump
# Backup all databases + globals
pg_dumpall -U postgres -f full_backup.sql
# Restore SQL dump
psql -U postgres -d mydb -f mydb.sql
# Restore custom format
pg_restore -U postgres -d mydb -Fc mydb.dump
# Restore specific table
pg_restore -U postgres -d mydb -t orders mydb.dump
# Parallel restore (faster)
pg_restore -U postgres -d mydb -j 4 mydb.dump
# Point-in-time base backup
pg_basebackup -U replicator -D /var/lib/postgresql/backup -Ft -z -P
Useful Extensions
CREATE EXTENSION IF NOT EXISTS pg_stat_statements; -- query stats
CREATE EXTENSION IF NOT EXISTS pgcrypto; -- encryption
CREATE EXTENSION IF NOT EXISTS uuid-ossp; -- UUID generation
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- fuzzy text search
CREATE EXTENSION IF NOT EXISTS hstore; -- key-value in column
CREATE EXTENSION IF NOT EXISTS postgis; -- geospatial
CREATE EXTENSION IF NOT EXISTS timescaledb; -- time-series
CREATE EXTENSION IF NOT EXISTS pg_partman; -- partition management
-- UUID primary key
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email TEXT UNIQUE NOT NULL
);
-- Fuzzy search with pg_trgm
CREATE INDEX ON products USING GIN (name gin_trgm_ops);
SELECT * FROM products WHERE name % 'postgress' ORDER BY similarity(name, 'postgress') DESC;
Common Workflows
Upsert Pattern
INSERT INTO users (email, name, updated_at)
VALUES ('alice@example.com', 'Alice', NOW())
ON CONFLICT (email) DO UPDATE
SET name = EXCLUDED.name,
updated_at = EXCLUDED.updated_at
RETURNING *;
-- Faster than OFFSET for large tables
SELECT id, created_at, title
FROM posts
WHERE created_at < '2026-01-01' -- cursor from last page
ORDER BY created_at DESC, id DESC
LIMIT 20;
Table Partitioning
-- Range partition by month
CREATE TABLE events (
id BIGSERIAL,
created_at TIMESTAMPTZ NOT NULL,
payload JSONB
) PARTITION BY RANGE (created_at);
CREATE TABLE events_2026_01
PARTITION OF events
FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
CREATE TABLE events_2026_02
PARTITION OF events
FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');
Monitor Active Queries and Locks
-- Active queries
SELECT pid, now() - query_start AS duration, query, state
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;
-- Kill a query
SELECT pg_cancel_backend(pid); -- graceful
SELECT pg_terminate_backend(pid); -- force
-- Lock waits
SELECT blocked.pid, blocked.query, blocking.pid AS blocking_pid
FROM pg_stat_activity blocked
JOIN pg_stat_activity blocking ON blocking.pid = ANY(pg_blocking_pids(blocked.pid))
WHERE cardinality(pg_blocking_pids(blocked.pid)) > 0;
Tips and Best Practices
- Use
BIGSERIAL or gen_random_uuid() for primary keys — SERIAL will hit 2B limit in large tables.
- Always
EXPLAIN ANALYZE before declaring a query slow — the plan reveals missing indexes and bad estimates.
CREATE INDEX CONCURRENTLY avoids table locks in production; it takes longer but doesn’t block reads or writes.
- Partial indexes on frequently-filtered subsets (e.g.,
WHERE active = true) are much smaller and faster than full-column indexes.
work_mem applies per sort/hash operation, not per connection — with many connections, set it modestly globally and use SET LOCAL in sessions that need more.
- VACUUM and autovacuum keep bloat in check — monitor
pg_stat_user_tables.n_dead_tup for tables with high update/delete rates.
pg_stat_statements is the first extension to install on any production database — it surfaces the costliest queries.
- Use CTEs with
MATERIALIZED to force evaluation when the optimizer makes poor choices; omit it to let the planner inline the CTE.
- Connection pooling (PgBouncer, pgpool-II) is essential at scale — PostgreSQL forks a process per connection.
- JSONB vs JSON — always prefer
JSONB; it is stored in binary, supports indexing, and allows operators like @>.
- Never store passwords in plain text — use
pgcrypto’s crypt() or handle hashing in the application layer.
- Backup with custom format (
-Fc) — it supports parallel restore with -j and selective table restore.