Zipkin

Zipkin is an open-source distributed tracing system for gathering timing data needed to troubleshoot latency problems.

Installation

Docker Container

# Quick start (in-memory storage)
docker run -d --name zipkin \
  -p 9411:9411 \
  openzipkin/zipkin:latest

# With MySQL backend
docker run -d --name zipkin \
  -e STORAGE_TYPE=mysql \
  -e MYSQL_HOST=mysql-host \
  -p 9411:9411 \
  openzipkin/zipkin:latest

Docker Compose

version: '3'
services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
    environment:
      - discovery.type=single-node
    ports:
      - "9200:9200"

  zipkin:
    image: openzipkin/zipkin:latest
    environment:
      - STORAGE_TYPE=elasticsearch
      - ES_HOSTS=elasticsearch
      - ES_HTTP_LOGGING=1
    ports:
      - "9411:9411"
    depends_on:
      - elasticsearch

Linux Binary

# Download and run
curl -sSL https://zipkin.io/quickstart.sh | bash -s

# Or manual download
wget https://repo1.maven.org/maven2/io/zipkin/java/zipkin-server/2.24.0/zipkin-server-2.24.0-exec.jar
java -jar zipkin-server-2.24.0-exec.jar

Docker Compose Full Stack

version: '3'
services:
  mysql:
    image: mysql:5.7
    environment:
      MYSQL_ROOT_PASSWORD: zipkin
      MYSQL_DATABASE: zipkin
    ports:
      - "3306:3306"
    volumes:
      - mysqldata:/var/lib/mysql

  zipkin:
    image: openzipkin/zipkin:latest
    environment:
      - STORAGE_TYPE=mysql
      - MYSQL_HOST=mysql
      - MYSQL_USER=root
      - MYSQL_PASS=zipkin
      - MYSQL_DB=zipkin
    ports:
      - "9411:9411"
    depends_on:
      - mysql

volumes:
  mysqldata:

Storage Backends

In-Memory (Development)

# Default in-memory storage
java -jar zipkin-server.jar

# Configure memory limits
java -Xmx512m -jar zipkin-server.jar

Elasticsearch

# Elasticsearch 7.x
java -jar zipkin-server.jar \
  --storage.type=elasticsearch \
  --elasticsearch.hosts=http://localhost:9200

# With authentication
java -jar zipkin-server.jar \
  --storage.type=elasticsearch \
  --elasticsearch.hosts=https://elastic:mypassword@localhost:9200 \
  --elasticsearch.index=zipkin-traces \
  --elasticsearch.pipeline=zipkin-spans

MySQL

# MySQL setup
mysql -u root -ppassword -e "CREATE DATABASE IF NOT EXISTS zipkin;"

# Run Zipkin with MySQL
java -jar zipkin-server.jar \
  --storage.type=mysql \
  --mysql.host=localhost \
  --mysql.port=3306 \
  --mysql.username=root \
  --mysql.password=password \
  --mysql.db=zipkin

Cassandra

# Cassandra configuration
java -jar zipkin-server.jar \
  --storage.type=cassandra3 \
  --cassandra.contact-points=localhost:9042 \
  --cassandra.keyspace=zipkin3 \
  --cassandra.local-dc=datacenter1

Instrumentation

JavaScript/Node.js

// tracer.js
const zipkin = require("zipkin");
const { HttpLogger } = require("zipkin-transport-http");

const zipkinLocalServiceConfig = {
  serviceName: "my-service",
  host: "127.0.0.1",
  port: 3000
};

const recorder = new zipkin.BatchRecorder({
  logger: new HttpLogger({
    endpoint: "http://localhost:9411/api/v2/spans",
    headers: { "Content-Type": "application/json" }
  })
});

const tracer = new zipkin.Tracer({
  recorder,
  ctxImpl: new zipkin.ExplicitContext(),
  localServiceConfig: zipkinLocalServiceConfig
});

module.exports = tracer;

Express.js Integration

// app.js
const express = require("express");
const zipkinMiddleware = require("zipkin-instrumentation-express");
const tracer = require("./tracer");

const app = express();

// Add Zipkin middleware
app.use(zipkinMiddleware.expressMiddleware({
  tracer,
  serviceName: "my-express-app",
  port: 3000
}));

app.get("/api/users", (req, res) => {
  const span = tracer.startTracer("get-users");
  try {
    // Get users logic
    res.json({ users: [] });
    span.setTag(zipkin.Tags.HTTP_STATUS_CODE, 200);
  } catch (error) {
    span.setTag(zipkin.Tags.ERROR, true);
    res.status(500).json({ error: error.message });
  }
});

app.listen(3000);

Python

# tracer.py
from zipkin_python import zipkin
from zipkin_python.create_zipkin import create_zipkin
from py_zipkin.zipkin import ZipkinLoggingContext
from py_zipkin.storage import get_storage

storage = get_storage(
    storage_type="http",
    zipkin_url="http://localhost:9411"
)

zipkin_context = ZipkinLoggingContext(
    service_name="my-service",
    span_name="my-span",
    transport_handler=zipkin.create_http_transport_handler(
        zipkin_api_url="http://localhost:9411/api/v2/spans"
    ),
    port=8000
)

zipkin_context.start()
try:
    # Your code here
    pass
finally:
    zipkin_context.stop()

Java

// TracerFactory.java
import brave.Tracing;
import brave.http.HttpTracing;
import zipkin2.reporter.AsyncReporter;
import zipkin2.reporter.okhttp3.OkHttpSender;

AsyncReporter<zipkin2.Span> reporter = AsyncReporter.create(
    OkHttpSender.create("http://localhost:9411/api/v2/spans")
);

Tracing tracing = Tracing.newBuilder()
    .localServiceName("my-service")
    .spanReporter(reporter)
    .build();

HttpTracing httpTracing = HttpTracing.create(tracing);

API Endpoints

Services and Operations

# Get all services
curl http://localhost:9411/api/v2/services

# Get operations for service
curl http://localhost:9411/api/v2/services/my-service/spans

# Get service stats
curl http://localhost:9411/api/v2/services/my-service/trace-stats

Trace Search and Retrieval

# Search traces
curl "http://localhost:9411/api/v2/traces?serviceName=my-service"

# Search with time range (milliseconds since epoch)
curl "http://localhost:9411/api/v2/traces?serviceName=my-service&endTs=1609459200000&limit=50"

# Search by span tag
curl "http://localhost:9411/api/v2/traces?serviceName=my-service&tags=http.status_code%3D500"

# Get single trace
curl http://localhost:9411/api/v2/traces/trace-id

# Get trace dependencies
curl http://localhost:9411/api/v2/dependencies?endTs=1609459200000

Context Propagation

B3 Header Propagation

// Node.js - Inject B3 headers
const trace_id = "trace-id";
const span_id = "span-id";
const parent_span_id = "parent-span-id";
const sampled = 1;

const b3Headers = {
  "X-B3-TraceId": trace_id,
  "X-B3-SpanId": span_id,
  "X-B3-ParentSpanId": parent_span_id,
  "X-B3-Sampled": sampled
};

// Add to outgoing request
Object.assign(outgoingRequest.headers, b3Headers);

// Extract B3 headers from incoming request
const traceId = req.headers["x-b3-traceid"];
const spanId = req.headers["x-b3-spanid"];
const parentSpanId = req.headers["x-b3-parentspanid"];
const sampled = req.headers["x-b3-sampled"];

Configuration

Zipkin Server Configuration

# zipkin.yml
server:
  port: 9411
  servlet:
    context-path: /

management:
  endpoints:
    web:
      exposure:
        include: health,info

zipkin:
  storage:
    type: elasticsearch
  elasticsearch:
    hosts: localhost:9200
    index: zipkin
    pipeline: zipkin-spans
  ui:
    enabled: true
    defaultLookback: 900000

Environment Variables

# Start Zipkin with environment variables
export STORAGE_TYPE=elasticsearch
export ES_HOSTS=elasticsearch:9200
export ES_INDEX_REPLICAS=1
export ES_PIPELINE=zipkin-spans
export UI_ENABLED=true

java -jar zipkin-server.jar

Web UI Usage

Trace Search

Open http://localhost:9411
Select service from dropdown
Set time range (1 hour, 24 hours, custom)
Filter by operation or tags
Click “Find Traces”

Trace Details

Click on a trace
View timeline of spans
Inspect span details (tags, logs, timing)
Export as JSON if needed

Troubleshooting

Verify Server Running

# Health check
curl http://localhost:9411/actuator/health

# Get service configuration
curl http://localhost:9411/config.json

# Check dependency graph
curl http://localhost:9411/api/v2/dependencies

Debug Span Issues

# Check if spans are being received
curl "http://localhost:9411/api/v2/services"

# View recent traces
curl "http://localhost:9411/api/v2/traces?limit=10"

# Check specific service
curl "http://localhost:9411/api/v2/services/my-service/spans"

Performance Optimization

# Elasticsearch optimization
# Increase shard count for large deployments
PUT /zipkin-*
{
  "settings": {
    "number_of_shards": 5,
    "number_of_replicas": 1,
    "index.refresh_interval": "30s"
  }
}

# MySQL optimization
ALTER TABLE zipkin_spans ADD INDEX `idx_trace_id` (trace_id);
ALTER TABLE zipkin_annotations ADD INDEX `idx_trace_id` (trace_id);

Best Practices

Set appropriate sampling rates for high-traffic services
Use consistent service naming across all instrumentation
Include meaningful span tags for filtering and analysis
Configure appropriate data retention based on storage capacity
Monitor Zipkin server health and storage capacity
Use dependency graphs to identify service communication patterns
Set up alerts for error rates and latency increases
Regular backup of storage backend (especially for MySQL)
Test instrumentation in development before production deployment
Document custom span tags and their purposes

Resources

Last updated: 2025-03-30