Ir al contenido

Jaeger

Jaeger is a distributed tracing platform for monitoring and troubleshooting complex microservices environments.

Installation

Docker Container

# All-in-one container (development)
docker run -d --name jaeger \
  -e COLLECTOR_OTLP_ENABLED=true \
  -p 16686:16686 \
  -p 4317:4317 \
  -p 4318:4318 \
  -p 6831:6831/udp \
  jaegertracing/all-in-one:latest

# Access UI at http://localhost:16686

Docker Compose Production Setup

version: '3'
services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
    environment:
      - discovery.type=single-node
      - xpack.security.enabled=false
    ports:
      - "9200:9200"
    volumes:
      - esdata:/usr/share/elasticsearch/data

  jaeger-collector:
    image: jaegertracing/jaeger-collector:latest
    environment:
      - SPAN_STORAGE_TYPE=elasticsearch
      - ES_SERVER_URLS=http://elasticsearch:9200
    ports:
      - "14268:14268"  # HTTP collector
      - "14250:14250"  # gRPC collector
    depends_on:
      - elasticsearch

  jaeger-query:
    image: jaegertracing/jaeger-query:latest
    environment:
      - SPAN_STORAGE_TYPE=elasticsearch
      - ES_SERVER_URLS=http://elasticsearch:9200
    ports:
      - "16686:16686"
    depends_on:
      - elasticsearch

  jaeger-agent:
    image: jaegertracing/jaeger-agent:latest
    command: --reporter.logLevel=debug --reporter.grpc.host-port=jaeger-collector:14250
    ports:
      - "6831:6831/udp"
      - "6832:6832/udp"
      - "5778:5778"
      - "5775:5775/udp"
    depends_on:
      - jaeger-collector

volumes:
  esdata:

Linux Binary

# Download latest release
JAEGER_VERSION=1.48.0
wget https://github.com/jaegertracing/jaeger/releases/download/v${JAEGER_VERSION}/jaeger-${JAEGER_VERSION}-linux-amd64.tar.gz
tar -xzf jaeger-${JAEGER_VERSION}-linux-amd64.tar.gz
cd jaeger-${JAEGER_VERSION}-linux-amd64

# Start agent
./jaeger-agent --reporter.logLevel=debug

# Start collector
./jaeger-collector --span-storage.type=badger

# Start query service
./jaeger-query --span-storage.type=badger

Agent Configuration

Basic Agent Setup

# Start agent with specific flags
jaeger-agent \
  --reporter.logLevel=info \
  --reporter.grpc.host-port=localhost:14250 \
  --processors.jaeger.model.v2.enabled=true

# Listen on custom port (default 6831)
jaeger-agent --agent.port=6831

# Change reporting format
jaeger-agent --reporter.grpc.host-port=jaeger-collector:14250

Agent Configuration File

# jaeger-agent-config.yaml
server:
  host-port: ":6831"

reporter:
  logLevel: info
  grpc:
    host-port: "jaeger-collector:14250"

logging:
  level: info

Collector Configuration

Span Storage Options

# In-memory (development only)
jaeger-collector --span-storage.type=memory

# Badger (embedded)
jaeger-collector --span-storage.type=badger --badger.ephemeral=false

# Elasticsearch
jaeger-collector \
  --span-storage.type=elasticsearch \
  --es.server-urls=http://elasticsearch:9200 \
  --es.index-prefix=jaeger

# Cassandra
jaeger-collector \
  --span-storage.type=cassandra \
  --cassandra.keyspace=jaeger_v1_prod \
  --cassandra.servers=cassandra-1,cassandra-2

OTLP Receiver Configuration

# Enable OTLP receivers
jaeger-collector \
  --collector.otlp.enabled=true \
  --collector.otlp.host-port=:4317 \
  --collector.otlp.http-port=4318

Sampling Configuration

Sampling Strategies

# Probabilistic sampling (1% of traces)
jaeger-collector \
  --sampling.type=probabilistic \
  --sampling.param=0.01

# Rate limiting (max 10 traces per second)
jaeger-collector \
  --sampling.type=rate-limiting \
  --sampling.param=10

# Remote sampling (fetch from remote service)
jaeger-agent \
  --sampling.type=remote \
  --sampling.manager-host-port=jaeger-collector:5778

Sampling Configuration File

{
  "default_strategy": {
    "type": "probabilistic",
    "param": 0.01
  },
  "service_strategies": [
    {
      "service": "critical-service",
      "type": "probabilistic",
      "param": 1.0
    },
    {
      "service": "background-job",
      "type": "probabilistic",
      "param": 0.001
    }
  ]
}

Client Integration

JavaScript/Node.js

// tracer.js
const initTracer = require("jaeger-client").initTracer;

const config = {
  serviceName: "my-service",
  sampler: {
    type: "const",
    param: 1
  },
  reporter: {
    host: "jaeger-agent",
    port: 6831
  }
};

const options = {
  logger: console
};

const tracer = initTracer(config, options);

module.exports = tracer;

Python

# tracer.py
from jaeger_client import Config
from jaeger_client.tracer import Tracer

config = Config(
    config={
        "sampler": {
            "type": "const",
            "param": 1
        },
        "logging": True
    },
    service_name="my-service",
    validate=True
)

tracer = config.initialize_tracer()

Java

// TracerConfig.java
import io.jaegertracing.Configuration;
import io.jaegertracing.internal.JaegerTracer;

JaegerTracer tracer = Configuration.fromEnv("my-service")
    .withSampler(Configuration.SamplerConfiguration.fromEnv()
        .withType("const")
        .withParam(1))
    .withReporter(Configuration.ReporterConfiguration.fromEnv()
        .withLogSpans(true)
        .withSender(Configuration.SenderConfiguration.fromEnv()
            .withAgentHost("localhost")
            .withAgentPort(6831)))
    .getTracer();

Query API

API Endpoints

# Get services list
curl http://localhost:16686/api/services

# Get service operations
curl http://localhost:16686/api/traces?service=my-service&operation=GET

# Search traces
curl "http://localhost:16686/api/traces?service=my-service&tags=error%3Dtrue"

# Get trace details
curl http://localhost:16686/api/traces/trace-id

# Get service metrics
curl http://localhost:16686/api/services/{serviceName}/operations

Search Examples

# Traces with error status
curl "http://localhost:16686/api/traces?service=api&tags=error%3Dtrue"

# Traces with latency threshold (microseconds)
curl "http://localhost:16686/api/traces?service=api&minDuration=100ms"

# Traces by span tag
curl "http://localhost:16686/api/traces?service=api&tags=http.status_code%3D500"

# Traces in time range
curl "http://localhost:16686/api/traces?service=api&start=1609459200000000&end=1609545600000000"

Instrumentation Examples

Create Spans

// Node.js
const tracer = require("./tracer");

const span = tracer.startSpan("database-query", {
  tags: {
    "db.type": "postgres",
    "db.statement": "SELECT * FROM users"
  }
});

try {
  // Database query
  span.log({ event: "query_executed", results: 42 });
  span.setTag("db.rows", 42);
} catch (error) {
  span.setTag("error", true);
  span.log({ event: "error", message: error.message });
} finally {
  span.finish();
}

Context Propagation

// Extract from incoming request
const wireCtx = tracer.extract(
  opentracing.FORMAT_HTTP_HEADERS,
  req.headers
);

// Create child span
const span = tracer.startSpan(route, { childOf: wireCtx });

// Inject into outgoing request
const carrier = {};
tracer.inject(span.context(), opentracing.FORMAT_HTTP_HEADERS, carrier);
outgoingRequest.headers = carrier;

Troubleshooting

Check Agent Connection

# Verify agent is running
curl http://localhost:5778/sampling

# Check collector connectivity
telnet localhost 6831

# View agent logs
docker logs jaeger-agent | tail -100

View Traces in UI

# Open Jaeger UI
open http://localhost:16686

# Search for service
# 1. Select service from dropdown
# 2. Click "Find Traces"
# 3. Review span details and timeline

Performance Tuning

# Increase collector workers
jaeger-collector \
  --collector.grpc.host-port=:14250 \
  --collector.grpc.max-connection-idle=60s \
  --collector.queue-size=2000

# Elasticsearch performance
jaeger-collector \
  --es.bulk.size=5000000 \
  --es.bulk.flush-interval=200ms \
  --es.num-shards=5 \
  --es.num-replicas=1

Best Practices

  • Enable sampling to reduce storage costs in high-traffic systems
  • Use consistent service naming conventions across all services
  • Tag spans with meaningful business context (user_id, tenant_id)
  • Monitor span latency percentiles (p50, p95, p99)
  • Set up alerts for error rates and latency increases
  • Use distributed context propagation across all RPC boundaries
  • Implement proper span naming conventions (resource.operation)
  • Store traces in persistent backend (Elasticsearch, Cassandra)
  • Regularly clean up old traces to manage storage
  • Use sampling strategies per service based on traffic volume

Resources


Last updated: 2025-03-30