Overview
CockroachDB is a cloud-native distributed SQL database designed to build, scale, and manage modern data-intensive applications. It provides PostgreSQL compatibility, automatic sharding and replication, serializable ACID transactions, and multi-region deployment capabilities. The database is designed to survive disk, machine, rack, and even datacenter failures with minimal latency disruption and no manual intervention.
Created by ex-Google engineers who worked on Spanner and HBase, CockroachDB brings Google Spanner-like global consistency to an open-source, PostgreSQL-compatible database. It uses the Raft consensus protocol for replication, range-based partitioning for data distribution, and a distributed SQL query engine. CockroachDB is used by companies like Netflix, Bose, and DoorDash for applications requiring high availability and geo-distributed data.
Installation
Docker
# Single node (development)
docker run -d --name cockroach \
-p 26257:26257 -p 8080:8080 \
cockroachdb/cockroach:latest start-single-node \
--insecure
# Connect with SQL client
docker exec -it cockroach ./cockroach sql --insecure
Binary Installation
# macOS
brew install cockroachdb/tap/cockroach
# Linux
curl https://binaries.cockroachdb.com/cockroach-latest.linux-amd64.tgz | tar -xz
sudo cp cockroach-*/cockroach /usr/local/bin/
# Start single-node cluster
cockroach start-single-node --insecure --listen-addr=localhost:26257 --http-addr=localhost:8080 --store=cockroach-data --background
# Connect
cockroach sql --insecure
Multi-Node Cluster
# Node 1
cockroach start --insecure --listen-addr=node1:26257 \
--join=node1:26257,node2:26257,node3:26257
# Node 2
cockroach start --insecure --listen-addr=node2:26257 \
--join=node1:26257,node2:26257,node3:26257
# Node 3
cockroach start --insecure --listen-addr=node3:26257 \
--join=node1:26257,node2:26257,node3:26257
# Initialize cluster (run once)
cockroach init --insecure --host=node1:26257
SQL Basics
Database and Schema
-- Create database
CREATE DATABASE myapp;
USE myapp;
-- Create tables (PostgreSQL compatible)
CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email STRING NOT NULL UNIQUE,
name STRING NOT NULL,
age INT,
active BOOL DEFAULT true,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now()
);
CREATE TABLE posts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title STRING NOT NULL,
body STRING NOT NULL,
published BOOL DEFAULT false,
author_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
created_at TIMESTAMPTZ DEFAULT now(),
INDEX idx_posts_author (author_id),
INDEX idx_posts_created (created_at DESC)
);
-- Add column
ALTER TABLE users ADD COLUMN role STRING DEFAULT 'user';
-- Create index
CREATE INDEX idx_users_email ON users (email);
CREATE INVERTED INDEX idx_users_metadata ON users (metadata);
Data Types
| Type | Description | Example |
|---|
INT | 64-bit integer | 42 |
FLOAT | 64-bit float | 3.14 |
DECIMAL | Arbitrary precision | DECIMAL(10,2) |
STRING | Variable-length text | 'hello' |
BOOL | Boolean | true |
UUID | 128-bit identifier | gen_random_uuid() |
TIMESTAMPTZ | Timestamp with timezone | now() |
DATE | Date without time | '2024-01-01' |
INTERVAL | Time duration | INTERVAL '1 hour' |
JSONB | Binary JSON | '{"key": "value"}' |
BYTES | Binary data | b'\x00\xff' |
ARRAY | Array type | ARRAY[1, 2, 3] |
INET | IP address | '192.168.1.1' |
CRUD Operations
-- Insert
INSERT INTO users (email, name, age, metadata)
VALUES ('alice@example.com', 'Alice', 30, '{"theme": "dark"}')
RETURNING id, created_at;
-- Upsert
UPSERT INTO users (id, email, name)
VALUES ('uuid-here', 'alice@example.com', 'Alice Updated');
-- Insert on conflict
INSERT INTO users (email, name) VALUES ('alice@example.com', 'Alice')
ON CONFLICT (email) DO UPDATE SET name = excluded.name, updated_at = now();
-- Select with joins
SELECT u.name, p.title, p.created_at
FROM users u
JOIN posts p ON u.id = p.author_id
WHERE p.published = true
ORDER BY p.created_at DESC
LIMIT 10;
-- JSONB queries
SELECT * FROM users WHERE metadata->>'theme' = 'dark';
SELECT * FROM users WHERE metadata @> '{"notifications": true}';
-- Window functions
SELECT
name,
age,
RANK() OVER (ORDER BY age DESC) AS age_rank,
AVG(age) OVER () AS avg_age
FROM users;
-- Update
UPDATE users SET name = 'Alice Smith', updated_at = now()
WHERE email = 'alice@example.com'
RETURNING *;
-- Delete
DELETE FROM users WHERE active = false RETURNING id, email;
-- Common Table Expression (CTE)
WITH active_authors AS (
SELECT DISTINCT author_id FROM posts WHERE published = true
)
SELECT u.name, u.email
FROM users u
JOIN active_authors aa ON u.id = aa.author_id;
CLI Commands
| Command | Description |
|---|
cockroach sql | Open SQL shell |
cockroach node status | Show cluster node status |
cockroach node ls | List cluster nodes |
cockroach quit | Gracefully shut down node |
cockroach dump mydb | Export database |
cockroach import | Import data |
cockroach debug zip | Generate debug archive |
cockroach gen haproxy | Generate HAProxy config |
cockroach workload init | Initialize benchmark workload |
Configuration
Cluster Settings
-- View settings
SHOW CLUSTER SETTINGS;
-- Change settings
SET CLUSTER SETTING server.time_until_store_dead = '5m';
SET CLUSTER SETTING kv.range_merge.queue_enabled = true;
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = true;
-- Session settings
SET timezone = 'UTC';
SET default_transaction_isolation = 'serializable';
SET statement_timeout = '30s';
Connection Strings
# PostgreSQL compatible
postgresql://root@localhost:26257/myapp?sslmode=disable
# With authentication
postgresql://user:password@localhost:26257/myapp?sslmode=verify-full&sslrootcert=certs/ca.crt
# CockroachDB Cloud
postgresql://user:pass@free-tier.gcp-us-central1.cockroachlabs.cloud:26257/mydb?sslmode=verify-full
Advanced Usage
Multi-Region Configuration
-- Add regions to database
ALTER DATABASE myapp PRIMARY REGION "us-east1";
ALTER DATABASE myapp ADD REGION "us-west1";
ALTER DATABASE myapp ADD REGION "eu-west1";
-- Regional by row
ALTER TABLE users ADD COLUMN crdb_region crdb_internal_region
AS (CASE
WHEN country IN ('US', 'CA') THEN 'us-east1'
WHEN country IN ('UK', 'DE', 'FR') THEN 'eu-west1'
ELSE 'us-west1'
END) STORED;
ALTER TABLE users SET LOCALITY REGIONAL BY ROW AS crdb_region;
-- Global table (read from any region)
ALTER TABLE config SET LOCALITY GLOBAL;
-- Regional table (pinned to primary region)
ALTER TABLE users SET LOCALITY REGIONAL BY TABLE IN PRIMARY REGION;
Transactions
-- Explicit transaction
BEGIN;
INSERT INTO users (email, name) VALUES ('bob@example.com', 'Bob');
INSERT INTO posts (title, body, author_id) VALUES ('Post', 'Body', (SELECT id FROM users WHERE email = 'bob@example.com'));
COMMIT;
-- With retry logic (application code)
-- CockroachDB may ask to retry serializable transactions
BEGIN;
SAVEPOINT cockroach_restart;
-- ... statements ...
RELEASE SAVEPOINT cockroach_restart;
COMMIT;
Changefeeds (CDC)
-- Create changefeed to Kafka
CREATE CHANGEFEED FOR TABLE users
INTO 'kafka://broker:9092?topic_prefix=crdb_'
WITH updated, resolved = '10s';
-- Changefeed to cloud storage
CREATE CHANGEFEED FOR TABLE users, posts
INTO 's3://bucket/path?AWS_ACCESS_KEY_ID=key&AWS_SECRET_ACCESS_KEY=secret'
WITH updated, resolved = '1m', format = json;
-- Webhook changefeed
CREATE CHANGEFEED FOR TABLE users
INTO 'webhook-https://api.example.com/webhook'
WITH updated;
Backup and Restore
-- Full backup
BACKUP DATABASE myapp INTO 's3://bucket/backup'
WITH revision_history;
-- Incremental backup
BACKUP DATABASE myapp INTO LATEST IN 's3://bucket/backup';
-- Scheduled backup
CREATE SCHEDULE daily_backup
FOR BACKUP DATABASE myapp INTO 's3://bucket/backup'
RECURRING '@daily'
FULL BACKUP '@weekly';
-- Restore
RESTORE DATABASE myapp FROM LATEST IN 's3://bucket/backup';
Monitoring
-- Active queries
SELECT * FROM [SHOW STATEMENTS];
-- Query statistics
SELECT * FROM crdb_internal.node_statement_statistics
ORDER BY count DESC LIMIT 10;
-- Range distribution
SELECT * FROM [SHOW RANGES FROM TABLE users];
-- Cluster health
SELECT * FROM crdb_internal.gossip_nodes;
Troubleshooting
| Problem | Solution |
|---|
| Transaction retry errors | Implement retry loops with SAVEPOINT cockroach_restart |
| Slow queries | Run EXPLAIN ANALYZE; check indexes; look at DB Console |
| Hot ranges | Check range distribution; add more columns to primary key |
| Node not joining | Verify --join addresses; check network connectivity |
| Certificate errors | Regenerate certs with cockroach cert create-* commands |
| Memory issues | Set --max-sql-memory and --cache flags appropriately |
| Replication lag | Check system.replication_stats; ensure nodes are healthy |
| Schema change stuck | Check SHOW JOBS; cancel with CANCEL JOB <id> |