Helicone
Heliconeはアプリケーションとllmプロバイダー間のプロキシレイヤーとして機能するオープンソースLLMオブザーバビリティプラットフォームです。1行の統合ですべてのリクエストをキャプチャし、ログ、コスト追跡、レート制限、キャッシュ、ユーザー分析を提供します。
インストール
# Install Helicone SDK (optional - proxy works without SDK)
pip install helicone
# or
npm install @helicone/helicone
# Get your API key from helicone.ai
export HELICONE_API_KEY="sk-helicone-your-key"
Proxy Setup
OpenAI via Proxy
from openai import OpenAI
# Point OpenAI to Helicone proxy - one line change
client = OpenAI(
base_url="https://oai.helicone.ai/v1",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
}
)
# All calls are now logged through Helicone
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}]
)
Anthropic via Proxy
from anthropic import Anthropic
client = Anthropic(
base_url="https://anthropic.helicone.ai",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
}
)
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}]
)
Node.js / TypeScript
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://oai.helicone.ai/v1",
defaultHeaders: {
"Helicone-Auth": `Bearer ${process.env.HELICONE_API_KEY}`,
},
});
const response = await client.chat.completions.create({
model: "gpt-4o",
messages: [{ role: "user", content: "Hello" }],
});
Azure OpenAI via Proxy
from openai import AzureOpenAI
client = AzureOpenAI(
azure_endpoint="https://oai.helicone.ai",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
"Helicone-OpenAI-Api-Base": "https://your-resource.openai.azure.com",
},
api_key="your-azure-key",
api_version="2024-02-15-preview",
)
Request Logging
Custom Properties
# Tag requests with custom properties for filtering
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize this article"}],
extra_headers={
"Helicone-Property-Environment": "production",
"Helicone-Property-Feature": "summarization",
"Helicone-Property-Version": "2.1",
}
)
User Tracking
# Track per-user usage and costs
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-User-Id": "user-123",
}
)
Session Tracking
# Group related requests into sessions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Start of conversation"}],
extra_headers={
"Helicone-Session-Id": "session-abc",
"Helicone-Session-Name": "Customer Support Chat",
"Helicone-Session-Path": "/chat/turn-1",
}
)
Cost Tracking
Viewing Costs via API
# Query request data and costs
curl -s "https://api.helicone.ai/v1/request/query" \
-H "Authorization: Bearer $HELICONE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"filter": {
"properties": {
"environment": {"equals": "production"}
},
"request_created_at": {
"gte": "2025-01-01T00:00:00Z"
}
},
"limit": 100
}'
Cost Alerts
# Costs are tracked automatically per model, user, and property
# Configure alerts in the Helicone dashboard:
# - Daily/weekly/monthly cost thresholds
# - Per-user spending limits
# - Anomaly detection on cost spikes
Rate Limiting
# Apply rate limits via headers
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
# Rate limit: 100 requests per minute per user
"Helicone-RateLimit-Policy": "100;w=60;u=user",
"Helicone-User-Id": "user-123",
}
)
Caching
# Enable LLM response caching to reduce costs
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
extra_headers={
"Helicone-Cache-Enabled": "true",
}
)
# Set cache bucket size (group similar prompts)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
extra_headers={
"Helicone-Cache-Enabled": "true",
"Helicone-Cache-Bucket-Max-Size": "3", # Cache up to 3 responses
"Helicone-Cache-Seed": "random-seed", # Control cache variation
}
)
Retries
# Automatic retry with exponential backoff
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Retry-Enabled": "true",
"Helicone-Retry-Num": "3", # Max retries
"Helicone-Retry-Factor": "2", # Backoff multiplier
"Helicone-Retry-Min-Timeout": "1000", # Min wait ms
"Helicone-Retry-Max-Timeout": "10000", # Max wait ms
}
)
Prompt Management
# Track prompt versions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Prompt-Id": "summarizer-v2",
"Helicone-Prompt-Version": "2.0",
}
)
Feedback and Scoring
# Add feedback to a logged request
curl -X POST "https://api.helicone.ai/v1/request/{request-id}/feedback" \
-H "Authorization: Bearer $HELICONE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"rating": true,
"score": 0.95
}'
Gateway Fallback
# Configure model fallback through Helicone gateway
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Fallbacks": '["gpt-4o-mini", "gpt-3.5-turbo"]',
}
)