Helicone
Helicone هي منصة مراقبة LLM مفتوحة المصدر تعمل كطبقة وكيل بين تطبيقك ومزودي LLM. تلتقط كل طلب للتسجيل وتتبع التكاليف وتحديد المعدل والتخزين المؤقت وتحليلات المستخدمين بتكامل سطر واحد.
التثبيت
# Install Helicone SDK (optional - proxy works without SDK)
pip install helicone
# or
npm install @helicone/helicone
# Get your API key from helicone.ai
export HELICONE_API_KEY="sk-helicone-your-key"
Proxy Setup
OpenAI via Proxy
from openai import OpenAI
# Point OpenAI to Helicone proxy - one line change
client = OpenAI(
base_url="https://oai.helicone.ai/v1",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
}
)
# All calls are now logged through Helicone
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}]
)
Anthropic via Proxy
from anthropic import Anthropic
client = Anthropic(
base_url="https://anthropic.helicone.ai",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
}
)
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}]
)
Node.js / TypeScript
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://oai.helicone.ai/v1",
defaultHeaders: {
"Helicone-Auth": `Bearer ${process.env.HELICONE_API_KEY}`,
},
});
const response = await client.chat.completions.create({
model: "gpt-4o",
messages: [{ role: "user", content: "Hello" }],
});
Azure OpenAI via Proxy
from openai import AzureOpenAI
client = AzureOpenAI(
azure_endpoint="https://oai.helicone.ai",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
"Helicone-OpenAI-Api-Base": "https://your-resource.openai.azure.com",
},
api_key="your-azure-key",
api_version="2024-02-15-preview",
)
Request Logging
Custom Properties
# Tag requests with custom properties for filtering
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize this article"}],
extra_headers={
"Helicone-Property-Environment": "production",
"Helicone-Property-Feature": "summarization",
"Helicone-Property-Version": "2.1",
}
)
User Tracking
# Track per-user usage and costs
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-User-Id": "user-123",
}
)
Session Tracking
# Group related requests into sessions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Start of conversation"}],
extra_headers={
"Helicone-Session-Id": "session-abc",
"Helicone-Session-Name": "Customer Support Chat",
"Helicone-Session-Path": "/chat/turn-1",
}
)
Cost Tracking
Viewing Costs via API
# Query request data and costs
curl -s "https://api.helicone.ai/v1/request/query" \
-H "Authorization: Bearer $HELICONE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"filter": {
"properties": {
"environment": {"equals": "production"}
},
"request_created_at": {
"gte": "2025-01-01T00:00:00Z"
}
},
"limit": 100
}'
Cost Alerts
# Costs are tracked automatically per model, user, and property
# Configure alerts in the Helicone dashboard:
# - Daily/weekly/monthly cost thresholds
# - Per-user spending limits
# - Anomaly detection on cost spikes
Rate Limiting
# Apply rate limits via headers
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
# Rate limit: 100 requests per minute per user
"Helicone-RateLimit-Policy": "100;w=60;u=user",
"Helicone-User-Id": "user-123",
}
)
Caching
# Enable LLM response caching to reduce costs
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
extra_headers={
"Helicone-Cache-Enabled": "true",
}
)
# Set cache bucket size (group similar prompts)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
extra_headers={
"Helicone-Cache-Enabled": "true",
"Helicone-Cache-Bucket-Max-Size": "3", # Cache up to 3 responses
"Helicone-Cache-Seed": "random-seed", # Control cache variation
}
)
Retries
# Automatic retry with exponential backoff
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Retry-Enabled": "true",
"Helicone-Retry-Num": "3", # Max retries
"Helicone-Retry-Factor": "2", # Backoff multiplier
"Helicone-Retry-Min-Timeout": "1000", # Min wait ms
"Helicone-Retry-Max-Timeout": "10000", # Max wait ms
}
)
Prompt Management
# Track prompt versions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Prompt-Id": "summarizer-v2",
"Helicone-Prompt-Version": "2.0",
}
)
Feedback and Scoring
# Add feedback to a logged request
curl -X POST "https://api.helicone.ai/v1/request/{request-id}/feedback" \
-H "Authorization: Bearer $HELICONE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"rating": true,
"score": 0.95
}'
Gateway Fallback
# Configure model fallback through Helicone gateway
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Fallbacks": '["gpt-4o-mini", "gpt-3.5-turbo"]',
}
)