Zipkin
Zipkin is an open-source distributed tracing system for gathering timing data needed to troubleshoot latency problems.
Installation
Docker Container
# Quick start (in-memory storage)
docker run -d --name zipkin \
-p 9411:9411 \
openzipkin/zipkin:latest
# With MySQL backend
docker run -d --name zipkin \
-e STORAGE_TYPE=mysql \
-e MYSQL_HOST=mysql-host \
-p 9411:9411 \
openzipkin/zipkin:latest
Docker Compose
version: '3'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
environment:
- discovery.type=single-node
ports:
- "9200:9200"
zipkin:
image: openzipkin/zipkin:latest
environment:
- STORAGE_TYPE=elasticsearch
- ES_HOSTS=elasticsearch
- ES_HTTP_LOGGING=1
ports:
- "9411:9411"
depends_on:
- elasticsearch
Linux Binary
# Download and run
curl -sSL https://zipkin.io/quickstart.sh | bash -s
# Or manual download
wget https://repo1.maven.org/maven2/io/zipkin/java/zipkin-server/2.24.0/zipkin-server-2.24.0-exec.jar
java -jar zipkin-server-2.24.0-exec.jar
Docker Compose Full Stack
version: '3'
services:
mysql:
image: mysql:5.7
environment:
MYSQL_ROOT_PASSWORD: zipkin
MYSQL_DATABASE: zipkin
ports:
- "3306:3306"
volumes:
- mysqldata:/var/lib/mysql
zipkin:
image: openzipkin/zipkin:latest
environment:
- STORAGE_TYPE=mysql
- MYSQL_HOST=mysql
- MYSQL_USER=root
- MYSQL_PASS=zipkin
- MYSQL_DB=zipkin
ports:
- "9411:9411"
depends_on:
- mysql
volumes:
mysqldata:
Storage Backends
In-Memory (Development)
# Default in-memory storage
java -jar zipkin-server.jar
# Configure memory limits
java -Xmx512m -jar zipkin-server.jar
Elasticsearch
# Elasticsearch 7.x
java -jar zipkin-server.jar \
--storage.type=elasticsearch \
--elasticsearch.hosts=http://localhost:9200
# With authentication
java -jar zipkin-server.jar \
--storage.type=elasticsearch \
--elasticsearch.hosts=https://elastic:mypassword@localhost:9200 \
--elasticsearch.index=zipkin-traces \
--elasticsearch.pipeline=zipkin-spans
MySQL
# MySQL setup
mysql -u root -ppassword -e "CREATE DATABASE IF NOT EXISTS zipkin;"
# Run Zipkin with MySQL
java -jar zipkin-server.jar \
--storage.type=mysql \
--mysql.host=localhost \
--mysql.port=3306 \
--mysql.username=root \
--mysql.password=password \
--mysql.db=zipkin
Cassandra
# Cassandra configuration
java -jar zipkin-server.jar \
--storage.type=cassandra3 \
--cassandra.contact-points=localhost:9042 \
--cassandra.keyspace=zipkin3 \
--cassandra.local-dc=datacenter1
Instrumentation
JavaScript/Node.js
// tracer.js
const zipkin = require("zipkin");
const { HttpLogger } = require("zipkin-transport-http");
const zipkinLocalServiceConfig = {
serviceName: "my-service",
host: "127.0.0.1",
port: 3000
};
const recorder = new zipkin.BatchRecorder({
logger: new HttpLogger({
endpoint: "http://localhost:9411/api/v2/spans",
headers: { "Content-Type": "application/json" }
})
});
const tracer = new zipkin.Tracer({
recorder,
ctxImpl: new zipkin.ExplicitContext(),
localServiceConfig: zipkinLocalServiceConfig
});
module.exports = tracer;
Express.js Integration
// app.js
const express = require("express");
const zipkinMiddleware = require("zipkin-instrumentation-express");
const tracer = require("./tracer");
const app = express();
// Add Zipkin middleware
app.use(zipkinMiddleware.expressMiddleware({
tracer,
serviceName: "my-express-app",
port: 3000
}));
app.get("/api/users", (req, res) => {
const span = tracer.startTracer("get-users");
try {
// Get users logic
res.json({ users: [] });
span.setTag(zipkin.Tags.HTTP_STATUS_CODE, 200);
} catch (error) {
span.setTag(zipkin.Tags.ERROR, true);
res.status(500).json({ error: error.message });
}
});
app.listen(3000);
Python
# tracer.py
from zipkin_python import zipkin
from zipkin_python.create_zipkin import create_zipkin
from py_zipkin.zipkin import ZipkinLoggingContext
from py_zipkin.storage import get_storage
storage = get_storage(
storage_type="http",
zipkin_url="http://localhost:9411"
)
zipkin_context = ZipkinLoggingContext(
service_name="my-service",
span_name="my-span",
transport_handler=zipkin.create_http_transport_handler(
zipkin_api_url="http://localhost:9411/api/v2/spans"
),
port=8000
)
zipkin_context.start()
try:
# Your code here
pass
finally:
zipkin_context.stop()
Java
// TracerFactory.java
import brave.Tracing;
import brave.http.HttpTracing;
import zipkin2.reporter.AsyncReporter;
import zipkin2.reporter.okhttp3.OkHttpSender;
AsyncReporter<zipkin2.Span> reporter = AsyncReporter.create(
OkHttpSender.create("http://localhost:9411/api/v2/spans")
);
Tracing tracing = Tracing.newBuilder()
.localServiceName("my-service")
.spanReporter(reporter)
.build();
HttpTracing httpTracing = HttpTracing.create(tracing);
API Endpoints
Services and Operations
# Get all services
curl http://localhost:9411/api/v2/services
# Get operations for service
curl http://localhost:9411/api/v2/services/my-service/spans
# Get service stats
curl http://localhost:9411/api/v2/services/my-service/trace-stats
Trace Search and Retrieval
# Search traces
curl "http://localhost:9411/api/v2/traces?serviceName=my-service"
# Search with time range (milliseconds since epoch)
curl "http://localhost:9411/api/v2/traces?serviceName=my-service&endTs=1609459200000&limit=50"
# Search by span tag
curl "http://localhost:9411/api/v2/traces?serviceName=my-service&tags=http.status_code%3D500"
# Get single trace
curl http://localhost:9411/api/v2/traces/trace-id
# Get trace dependencies
curl http://localhost:9411/api/v2/dependencies?endTs=1609459200000
Context Propagation
B3 Header Propagation
// Node.js - Inject B3 headers
const trace_id = "trace-id";
const span_id = "span-id";
const parent_span_id = "parent-span-id";
const sampled = 1;
const b3Headers = {
"X-B3-TraceId": trace_id,
"X-B3-SpanId": span_id,
"X-B3-ParentSpanId": parent_span_id,
"X-B3-Sampled": sampled
};
// Add to outgoing request
Object.assign(outgoingRequest.headers, b3Headers);
// Extract B3 headers from incoming request
const traceId = req.headers["x-b3-traceid"];
const spanId = req.headers["x-b3-spanid"];
const parentSpanId = req.headers["x-b3-parentspanid"];
const sampled = req.headers["x-b3-sampled"];
Configuration
Zipkin Server Configuration
# zipkin.yml
server:
port: 9411
servlet:
context-path: /
management:
endpoints:
web:
exposure:
include: health,info
zipkin:
storage:
type: elasticsearch
elasticsearch:
hosts: localhost:9200
index: zipkin
pipeline: zipkin-spans
ui:
enabled: true
defaultLookback: 900000
Environment Variables
# Start Zipkin with environment variables
export STORAGE_TYPE=elasticsearch
export ES_HOSTS=elasticsearch:9200
export ES_INDEX_REPLICAS=1
export ES_PIPELINE=zipkin-spans
export UI_ENABLED=true
java -jar zipkin-server.jar
Web UI Usage
Trace Search
- Open http://localhost:9411
- Select service from dropdown
- Set time range (1 hour, 24 hours, custom)
- Filter by operation or tags
- Click “Find Traces”
Trace Details
- Click on a trace
- View timeline of spans
- Inspect span details (tags, logs, timing)
- Export as JSON if needed
Troubleshooting
Verify Server Running
# Health check
curl http://localhost:9411/actuator/health
# Get service configuration
curl http://localhost:9411/config.json
# Check dependency graph
curl http://localhost:9411/api/v2/dependencies
Debug Span Issues
# Check if spans are being received
curl "http://localhost:9411/api/v2/services"
# View recent traces
curl "http://localhost:9411/api/v2/traces?limit=10"
# Check specific service
curl "http://localhost:9411/api/v2/services/my-service/spans"
Performance Optimization
# Elasticsearch optimization
# Increase shard count for large deployments
PUT /zipkin-*
{
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"index.refresh_interval": "30s"
}
}
# MySQL optimization
ALTER TABLE zipkin_spans ADD INDEX `idx_trace_id` (trace_id);
ALTER TABLE zipkin_annotations ADD INDEX `idx_trace_id` (trace_id);
Best Practices
- Set appropriate sampling rates for high-traffic services
- Use consistent service naming across all instrumentation
- Include meaningful span tags for filtering and analysis
- Configure appropriate data retention based on storage capacity
- Monitor Zipkin server health and storage capacity
- Use dependency graphs to identify service communication patterns
- Set up alerts for error rates and latency increases
- Regular backup of storage backend (especially for MySQL)
- Test instrumentation in development before production deployment
- Document custom span tags and their purposes
Resources
Last updated: 2025-03-30