콘텐츠로 이동

PostgreSQL

Installation

# Ubuntu / Debian
sudo apt install postgresql postgresql-contrib

# macOS (Homebrew)
brew install postgresql@16
brew services start postgresql@16

# Docker
docker run -d \
  --name postgres \
  -e POSTGRES_PASSWORD=secret \
  -p 5432:5432 \
  postgres:16

# Docker Compose
services:
  db:
    image: postgres:16
    environment:
      POSTGRES_DB: myapp
      POSTGRES_USER: myuser
      POSTGRES_PASSWORD: secret
    ports:
      - "5432:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data
volumes:
  pgdata:

Check version:

psql --version
pg_config --version

Configuration

Key configuration files (find with SHOW config_file;):

postgresql.conf — Core Settings

# Connection
listen_addresses = '*'
port = 5432
max_connections = 200

# Memory (tune to 25% of RAM for shared_buffers)
shared_buffers = 4GB
effective_cache_size = 12GB
work_mem = 64MB
maintenance_work_mem = 1GB

# WAL / replication
wal_level = replica
max_wal_senders = 5
wal_keep_size = 1GB

# Logging
log_min_duration_statement = 1000   # log queries > 1s
log_checkpoints = on
log_connections = on
log_lock_waits = on
log_temp_files = 0

# Autovacuum
autovacuum = on
autovacuum_vacuum_scale_factor = 0.05
autovacuum_analyze_scale_factor = 0.02

# Parallelism
max_parallel_workers_per_gather = 4
max_parallel_workers = 8

pg_hba.conf — Access Control

# TYPE  DATABASE  USER      ADDRESS         METHOD
local   all       postgres                  peer
local   all       all                       md5
host    all       all       0.0.0.0/0       scram-sha-256
host    all       all       ::1/128         scram-sha-256

Core Commands

psql Meta-Commands

CommandDescription
psql -U user -d dbnameConnect to database
psql -h host -p 5432 -U user -d dbConnect with host/port
\lList databases
\c dbnameSwitch database
\dtList tables in current schema
\dt schema.*List tables in a schema
\d tablenameDescribe table (columns, indexes)
\diList indexes
\dvList views
\dfList functions
\dnList schemas
\duList roles/users
\timingToggle query timing
\eEdit query in $EDITOR
\i file.sqlExecute SQL file
\o file.txtSend output to file
\copyClient-side COPY (no superuser)
\xToggle expanded display
\watch 2Re-run last query every 2 seconds
\qQuit psql

Database and User Management

CommandDescription
CREATE DATABASE mydb;Create database
DROP DATABASE mydb;Drop database
CREATE USER alice WITH PASSWORD 'pw';Create user
ALTER USER alice WITH SUPERUSER;Grant superuser
GRANT ALL ON DATABASE mydb TO alice;Grant DB access
GRANT SELECT ON ALL TABLES IN SCHEMA public TO alice;Grant read
REVOKE ALL ON TABLE orders FROM alice;Revoke access
DROP USER alice;Drop user
CREATE SCHEMA myschema AUTHORIZATION alice;Create schema
SET search_path TO myschema, public;Set schema search path

DDL — Table Operations

CommandDescription
CREATE TABLE t (...)Create table
ALTER TABLE t ADD COLUMN col type;Add column
ALTER TABLE t DROP COLUMN col;Drop column
ALTER TABLE t ALTER COLUMN col TYPE text;Change type
ALTER TABLE t RENAME COLUMN old TO new;Rename column
ALTER TABLE t ADD CONSTRAINT pk PRIMARY KEY (id);Add PK
ALTER TABLE t ADD CONSTRAINT fk FOREIGN KEY (...)Add FK
TRUNCATE TABLE t;Empty table (fast)
DROP TABLE t CASCADE;Drop table and dependents
CREATE TABLE t2 AS SELECT * FROM t1;Create from query

DML — Data Operations

CommandDescription
INSERT INTO t (a,b) VALUES (1,'x');Insert row
INSERT INTO t SELECT * FROM other;Insert from query
UPDATE t SET col=val WHERE cond;Update rows
DELETE FROM t WHERE cond;Delete rows
UPSERT / INSERT ... ON CONFLICT DO UPDATEUpsert
RETURNING *Return affected rows

Indexes

CommandDescription
CREATE INDEX idx ON t (col);Standard B-tree index
CREATE UNIQUE INDEX ON t (col);Unique index
CREATE INDEX CONCURRENTLY ON t (col);Non-blocking index
CREATE INDEX ON t USING HASH (col);Hash index
CREATE INDEX ON t USING GIN (col);GIN (JSONB, arrays)
CREATE INDEX ON t USING BRIN (col);BRIN (time-series)
CREATE INDEX ON t (a) WHERE active = true;Partial index
CREATE INDEX ON t (lower(email));Functional index
DROP INDEX CONCURRENTLY idx;Drop without locking
REINDEX TABLE t;Rebuild all indexes

Advanced Usage

JSONB Queries

-- Create table with JSONB column
CREATE TABLE events (
  id SERIAL PRIMARY KEY,
  payload JSONB NOT NULL,
  created_at TIMESTAMPTZ DEFAULT NOW()
);

-- Insert JSONB
INSERT INTO events (payload) VALUES
  ('{"type": "login", "user": "alice", "ip": "1.2.3.4"}'),
  ('{"type": "purchase", "amount": 99.99, "items": [1,2,3]}');

-- Query operators
SELECT payload->>'user' AS username FROM events;       -- text value
SELECT payload->'items'->0 AS first_item FROM events;  -- nested
SELECT * FROM events WHERE payload->>'type' = 'login'; -- filter
SELECT * FROM events WHERE payload @> '{"type":"purchase"}'; -- containment
SELECT * FROM events WHERE payload ? 'user';            -- key exists
SELECT * FROM events WHERE payload #>> '{items,0}' = '1'; -- path

-- GIN index for fast JSONB queries
CREATE INDEX ON events USING GIN (payload);
CREATE INDEX ON events USING GIN (payload jsonb_path_ops); -- smaller, @> only

-- Aggregate JSONB
SELECT payload->>'type', COUNT(*) FROM events GROUP BY 1;

-- Update JSONB
UPDATE events SET payload = payload || '{"processed": true}';
UPDATE events SET payload = payload - 'ip'; -- remove key

CTEs (Common Table Expressions)

-- Basic CTE
WITH monthly_sales AS (
  SELECT
    DATE_TRUNC('month', created_at) AS month,
    SUM(amount) AS total
  FROM orders
  GROUP BY 1
)
SELECT month, total, total - LAG(total) OVER (ORDER BY month) AS delta
FROM monthly_sales;

-- Recursive CTE — org hierarchy
WITH RECURSIVE org_tree AS (
  -- Base case: top-level managers
  SELECT id, name, manager_id, 0 AS depth
  FROM employees
  WHERE manager_id IS NULL

  UNION ALL

  -- Recursive step
  SELECT e.id, e.name, e.manager_id, t.depth + 1
  FROM employees e
  JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree ORDER BY depth, name;

-- Writable CTE
WITH deleted AS (
  DELETE FROM events
  WHERE created_at < NOW() - INTERVAL '90 days'
  RETURNING *
)
INSERT INTO archive SELECT * FROM deleted;

Window Functions

-- Rank within groups
SELECT
  department,
  employee,
  salary,
  RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank,
  DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
  ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num
FROM employees;

-- Running totals and moving average
SELECT
  order_date,
  amount,
  SUM(amount) OVER (ORDER BY order_date) AS running_total,
  AVG(amount) OVER (ORDER BY order_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS moving_avg_7d
FROM orders;

-- Lead/Lag for comparisons
SELECT
  month,
  revenue,
  LAG(revenue, 1) OVER (ORDER BY month) AS prev_month,
  revenue - LAG(revenue, 1) OVER (ORDER BY month) AS change,
  LEAD(revenue, 1) OVER (ORDER BY month) AS next_month
FROM monthly_revenue;

-- NTILE for percentiles
SELECT *, NTILE(4) OVER (ORDER BY score) AS quartile FROM test_scores;

EXPLAIN and Query Analysis

-- Basic explain
EXPLAIN SELECT * FROM orders WHERE user_id = 42;

-- Analyze (actually runs the query)
EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 42;

-- Full verbose output
EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT JSON)
SELECT * FROM orders WHERE user_id = 42;

-- Find slow queries (requires pg_stat_statements)
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
SELECT query, calls, total_exec_time/calls AS avg_ms, rows/calls AS avg_rows
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 20;

-- Missing index candidates
SELECT schemaname, tablename, attname, n_distinct, correlation
FROM pg_stats
WHERE tablename = 'orders' AND attname = 'user_id';

-- Table/index bloat
SELECT
  tablename,
  pg_size_pretty(pg_total_relation_size(tablename::regclass)) AS total_size,
  pg_size_pretty(pg_relation_size(tablename::regclass)) AS table_size,
  pg_size_pretty(pg_indexes_size(tablename::regclass)) AS index_size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;

Backup and Restore

# Backup single database (plain SQL)
pg_dump -U postgres -d mydb -f mydb.sql

# Backup in custom format (compressed, supports selective restore)
pg_dump -U postgres -d mydb -Fc -f mydb.dump

# Backup all databases + globals
pg_dumpall -U postgres -f full_backup.sql

# Restore SQL dump
psql -U postgres -d mydb -f mydb.sql

# Restore custom format
pg_restore -U postgres -d mydb -Fc mydb.dump

# Restore specific table
pg_restore -U postgres -d mydb -t orders mydb.dump

# Parallel restore (faster)
pg_restore -U postgres -d mydb -j 4 mydb.dump

# Point-in-time base backup
pg_basebackup -U replicator -D /var/lib/postgresql/backup -Ft -z -P

Useful Extensions

CREATE EXTENSION IF NOT EXISTS pg_stat_statements;  -- query stats
CREATE EXTENSION IF NOT EXISTS pgcrypto;             -- encryption
CREATE EXTENSION IF NOT EXISTS uuid-ossp;            -- UUID generation
CREATE EXTENSION IF NOT EXISTS pg_trgm;              -- fuzzy text search
CREATE EXTENSION IF NOT EXISTS hstore;               -- key-value in column
CREATE EXTENSION IF NOT EXISTS postgis;              -- geospatial
CREATE EXTENSION IF NOT EXISTS timescaledb;          -- time-series
CREATE EXTENSION IF NOT EXISTS pg_partman;           -- partition management

-- UUID primary key
CREATE TABLE users (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  email TEXT UNIQUE NOT NULL
);

-- Fuzzy search with pg_trgm
CREATE INDEX ON products USING GIN (name gin_trgm_ops);
SELECT * FROM products WHERE name % 'postgress' ORDER BY similarity(name, 'postgress') DESC;

Common Workflows

Upsert Pattern

INSERT INTO users (email, name, updated_at)
VALUES ('alice@example.com', 'Alice', NOW())
ON CONFLICT (email) DO UPDATE
  SET name = EXCLUDED.name,
      updated_at = EXCLUDED.updated_at
RETURNING *;

Paginate with Keyset (Cursor) Pagination

-- Faster than OFFSET for large tables
SELECT id, created_at, title
FROM posts
WHERE created_at < '2026-01-01'   -- cursor from last page
ORDER BY created_at DESC, id DESC
LIMIT 20;

Table Partitioning

-- Range partition by month
CREATE TABLE events (
  id BIGSERIAL,
  created_at TIMESTAMPTZ NOT NULL,
  payload JSONB
) PARTITION BY RANGE (created_at);

CREATE TABLE events_2026_01
  PARTITION OF events
  FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');

CREATE TABLE events_2026_02
  PARTITION OF events
  FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');

Monitor Active Queries and Locks

-- Active queries
SELECT pid, now() - query_start AS duration, query, state
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;

-- Kill a query
SELECT pg_cancel_backend(pid);   -- graceful
SELECT pg_terminate_backend(pid); -- force

-- Lock waits
SELECT blocked.pid, blocked.query, blocking.pid AS blocking_pid
FROM pg_stat_activity blocked
JOIN pg_stat_activity blocking ON blocking.pid = ANY(pg_blocking_pids(blocked.pid))
WHERE cardinality(pg_blocking_pids(blocked.pid)) > 0;

Tips and Best Practices

  • Use BIGSERIAL or gen_random_uuid() for primary keys — SERIAL will hit 2B limit in large tables.
  • Always EXPLAIN ANALYZE before declaring a query slow — the plan reveals missing indexes and bad estimates.
  • CREATE INDEX CONCURRENTLY avoids table locks in production; it takes longer but doesn’t block reads or writes.
  • Partial indexes on frequently-filtered subsets (e.g., WHERE active = true) are much smaller and faster than full-column indexes.
  • work_mem applies per sort/hash operation, not per connection — with many connections, set it modestly globally and use SET LOCAL in sessions that need more.
  • VACUUM and autovacuum keep bloat in check — monitor pg_stat_user_tables.n_dead_tup for tables with high update/delete rates.
  • pg_stat_statements is the first extension to install on any production database — it surfaces the costliest queries.
  • Use CTEs with MATERIALIZED to force evaluation when the optimizer makes poor choices; omit it to let the planner inline the CTE.
  • Connection pooling (PgBouncer, pgpool-II) is essential at scale — PostgreSQL forks a process per connection.
  • JSONB vs JSON — always prefer JSONB; it is stored in binary, supports indexing, and allows operators like @>.
  • Never store passwords in plain text — use pgcrypto’s crypt() or handle hashing in the application layer.
  • Backup with custom format (-Fc) — it supports parallel restore with -j and selective table restore.