Ir al contenido

CockroachDB Cheat Sheet

Overview

CockroachDB is a cloud-native distributed SQL database designed to build, scale, and manage modern data-intensive applications. It provides PostgreSQL compatibility, automatic sharding and replication, serializable ACID transactions, and multi-region deployment capabilities. The database is designed to survive disk, machine, rack, and even datacenter failures with minimal latency disruption and no manual intervention.

Created by ex-Google engineers who worked on Spanner and HBase, CockroachDB brings Google Spanner-like global consistency to an open-source, PostgreSQL-compatible database. It uses the Raft consensus protocol for replication, range-based partitioning for data distribution, and a distributed SQL query engine. CockroachDB is used by companies like Netflix, Bose, and DoorDash for applications requiring high availability and geo-distributed data.

Installation

Docker

# Single node (development)
docker run -d --name cockroach \
  -p 26257:26257 -p 8080:8080 \
  cockroachdb/cockroach:latest start-single-node \
  --insecure

# Connect with SQL client
docker exec -it cockroach ./cockroach sql --insecure

Binary Installation

# macOS
brew install cockroachdb/tap/cockroach

# Linux
curl https://binaries.cockroachdb.com/cockroach-latest.linux-amd64.tgz | tar -xz
sudo cp cockroach-*/cockroach /usr/local/bin/

# Start single-node cluster
cockroach start-single-node --insecure --listen-addr=localhost:26257 --http-addr=localhost:8080 --store=cockroach-data --background

# Connect
cockroach sql --insecure

Multi-Node Cluster

# Node 1
cockroach start --insecure --listen-addr=node1:26257 \
  --join=node1:26257,node2:26257,node3:26257

# Node 2
cockroach start --insecure --listen-addr=node2:26257 \
  --join=node1:26257,node2:26257,node3:26257

# Node 3
cockroach start --insecure --listen-addr=node3:26257 \
  --join=node1:26257,node2:26257,node3:26257

# Initialize cluster (run once)
cockroach init --insecure --host=node1:26257

SQL Basics

Database and Schema

-- Create database
CREATE DATABASE myapp;
USE myapp;

-- Create tables (PostgreSQL compatible)
CREATE TABLE users (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    email STRING NOT NULL UNIQUE,
    name STRING NOT NULL,
    age INT,
    active BOOL DEFAULT true,
    metadata JSONB,
    created_at TIMESTAMPTZ DEFAULT now(),
    updated_at TIMESTAMPTZ DEFAULT now()
);

CREATE TABLE posts (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    title STRING NOT NULL,
    body STRING NOT NULL,
    published BOOL DEFAULT false,
    author_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
    created_at TIMESTAMPTZ DEFAULT now(),
    INDEX idx_posts_author (author_id),
    INDEX idx_posts_created (created_at DESC)
);

-- Add column
ALTER TABLE users ADD COLUMN role STRING DEFAULT 'user';

-- Create index
CREATE INDEX idx_users_email ON users (email);
CREATE INVERTED INDEX idx_users_metadata ON users (metadata);

Data Types

TypeDescriptionExample
INT64-bit integer42
FLOAT64-bit float3.14
DECIMALArbitrary precisionDECIMAL(10,2)
STRINGVariable-length text'hello'
BOOLBooleantrue
UUID128-bit identifiergen_random_uuid()
TIMESTAMPTZTimestamp with timezonenow()
DATEDate without time'2024-01-01'
INTERVALTime durationINTERVAL '1 hour'
JSONBBinary JSON'{"key": "value"}'
BYTESBinary datab'\x00\xff'
ARRAYArray typeARRAY[1, 2, 3]
INETIP address'192.168.1.1'

CRUD Operations

-- Insert
INSERT INTO users (email, name, age, metadata)
VALUES ('alice@example.com', 'Alice', 30, '{"theme": "dark"}')
RETURNING id, created_at;

-- Upsert
UPSERT INTO users (id, email, name)
VALUES ('uuid-here', 'alice@example.com', 'Alice Updated');

-- Insert on conflict
INSERT INTO users (email, name) VALUES ('alice@example.com', 'Alice')
ON CONFLICT (email) DO UPDATE SET name = excluded.name, updated_at = now();

-- Select with joins
SELECT u.name, p.title, p.created_at
FROM users u
JOIN posts p ON u.id = p.author_id
WHERE p.published = true
ORDER BY p.created_at DESC
LIMIT 10;

-- JSONB queries
SELECT * FROM users WHERE metadata->>'theme' = 'dark';
SELECT * FROM users WHERE metadata @> '{"notifications": true}';

-- Window functions
SELECT
    name,
    age,
    RANK() OVER (ORDER BY age DESC) AS age_rank,
    AVG(age) OVER () AS avg_age
FROM users;

-- Update
UPDATE users SET name = 'Alice Smith', updated_at = now()
WHERE email = 'alice@example.com'
RETURNING *;

-- Delete
DELETE FROM users WHERE active = false RETURNING id, email;

-- Common Table Expression (CTE)
WITH active_authors AS (
    SELECT DISTINCT author_id FROM posts WHERE published = true
)
SELECT u.name, u.email
FROM users u
JOIN active_authors aa ON u.id = aa.author_id;

CLI Commands

CommandDescription
cockroach sqlOpen SQL shell
cockroach node statusShow cluster node status
cockroach node lsList cluster nodes
cockroach quitGracefully shut down node
cockroach dump mydbExport database
cockroach importImport data
cockroach debug zipGenerate debug archive
cockroach gen haproxyGenerate HAProxy config
cockroach workload initInitialize benchmark workload

Configuration

Cluster Settings

-- View settings
SHOW CLUSTER SETTINGS;

-- Change settings
SET CLUSTER SETTING server.time_until_store_dead = '5m';
SET CLUSTER SETTING kv.range_merge.queue_enabled = true;
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = true;

-- Session settings
SET timezone = 'UTC';
SET default_transaction_isolation = 'serializable';
SET statement_timeout = '30s';

Connection Strings

# PostgreSQL compatible
postgresql://root@localhost:26257/myapp?sslmode=disable

# With authentication
postgresql://user:password@localhost:26257/myapp?sslmode=verify-full&sslrootcert=certs/ca.crt

# CockroachDB Cloud
postgresql://user:pass@free-tier.gcp-us-central1.cockroachlabs.cloud:26257/mydb?sslmode=verify-full

Advanced Usage

Multi-Region Configuration

-- Add regions to database
ALTER DATABASE myapp PRIMARY REGION "us-east1";
ALTER DATABASE myapp ADD REGION "us-west1";
ALTER DATABASE myapp ADD REGION "eu-west1";

-- Regional by row
ALTER TABLE users ADD COLUMN crdb_region crdb_internal_region
  AS (CASE
    WHEN country IN ('US', 'CA') THEN 'us-east1'
    WHEN country IN ('UK', 'DE', 'FR') THEN 'eu-west1'
    ELSE 'us-west1'
  END) STORED;
ALTER TABLE users SET LOCALITY REGIONAL BY ROW AS crdb_region;

-- Global table (read from any region)
ALTER TABLE config SET LOCALITY GLOBAL;

-- Regional table (pinned to primary region)
ALTER TABLE users SET LOCALITY REGIONAL BY TABLE IN PRIMARY REGION;

Transactions

-- Explicit transaction
BEGIN;
INSERT INTO users (email, name) VALUES ('bob@example.com', 'Bob');
INSERT INTO posts (title, body, author_id) VALUES ('Post', 'Body', (SELECT id FROM users WHERE email = 'bob@example.com'));
COMMIT;

-- With retry logic (application code)
-- CockroachDB may ask to retry serializable transactions
BEGIN;
SAVEPOINT cockroach_restart;
-- ... statements ...
RELEASE SAVEPOINT cockroach_restart;
COMMIT;

Changefeeds (CDC)

-- Create changefeed to Kafka
CREATE CHANGEFEED FOR TABLE users
  INTO 'kafka://broker:9092?topic_prefix=crdb_'
  WITH updated, resolved = '10s';

-- Changefeed to cloud storage
CREATE CHANGEFEED FOR TABLE users, posts
  INTO 's3://bucket/path?AWS_ACCESS_KEY_ID=key&AWS_SECRET_ACCESS_KEY=secret'
  WITH updated, resolved = '1m', format = json;

-- Webhook changefeed
CREATE CHANGEFEED FOR TABLE users
  INTO 'webhook-https://api.example.com/webhook'
  WITH updated;

Backup and Restore

-- Full backup
BACKUP DATABASE myapp INTO 's3://bucket/backup'
  WITH revision_history;

-- Incremental backup
BACKUP DATABASE myapp INTO LATEST IN 's3://bucket/backup';

-- Scheduled backup
CREATE SCHEDULE daily_backup
  FOR BACKUP DATABASE myapp INTO 's3://bucket/backup'
  RECURRING '@daily'
  FULL BACKUP '@weekly';

-- Restore
RESTORE DATABASE myapp FROM LATEST IN 's3://bucket/backup';

Monitoring

-- Active queries
SELECT * FROM [SHOW STATEMENTS];

-- Query statistics
SELECT * FROM crdb_internal.node_statement_statistics
ORDER BY count DESC LIMIT 10;

-- Range distribution
SELECT * FROM [SHOW RANGES FROM TABLE users];

-- Cluster health
SELECT * FROM crdb_internal.gossip_nodes;

Troubleshooting

ProblemSolution
Transaction retry errorsImplement retry loops with SAVEPOINT cockroach_restart
Slow queriesRun EXPLAIN ANALYZE; check indexes; look at DB Console
Hot rangesCheck range distribution; add more columns to primary key
Node not joiningVerify --join addresses; check network connectivity
Certificate errorsRegenerate certs with cockroach cert create-* commands
Memory issuesSet --max-sql-memory and --cache flags appropriately
Replication lagCheck system.replication_stats; ensure nodes are healthy
Schema change stuckCheck SHOW JOBS; cancel with CANCEL JOB <id>