Commandes Helicone
Helicone est une plateforme d’observabilité LLM open source qui fonctionne comme une couche proxy entre votre application et les fournisseurs de LLMs. Elle fournit du logging, l’analyse des coûts, la mise en cache, la limitation de débit et la surveillance.
Installation
# Install Helicone SDK (optional - proxy works without SDK)
pip install helicone
# or
npm install @helicone/helicone
# Get your API key from helicone.ai
export HELICONE_API_KEY="sk-helicone-your-key"
Configuration du proxy
OpenAI via proxy
from openai import OpenAI
# Point OpenAI to Helicone proxy - one line change
client = OpenAI(
base_url="https://oai.helicone.ai/v1",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
}
)
# All calls are now logged through Helicone
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}]
)
Anthropic via proxy
from anthropic import Anthropic
client = Anthropic(
base_url="https://anthropic.helicone.ai",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
}
)
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}]
)
Node.js / TypeScript
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://oai.helicone.ai/v1",
defaultHeaders: {
"Helicone-Auth": `Bearer ${process.env.HELICONE_API_KEY}`,
},
});
const response = await client.chat.completions.create({
model: "gpt-4o",
messages: [{ role: "user", content: "Hello" }],
});
Azure OpenAI via proxy
from openai import AzureOpenAI
client = AzureOpenAI(
azure_endpoint="https://oai.helicone.ai",
default_headers={
"Helicone-Auth": f"Bearer {HELICONE_API_KEY}",
"Helicone-OpenAI-Api-Base": "https://your-resource.openai.azure.com",
},
api_key="your-azure-key",
api_version="2024-02-15-preview",
)
Journalisation des requêtes
Propriétés personnalisées
# Tag requests with custom properties for filtering
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize this article"}],
extra_headers={
"Helicone-Property-Environment": "production",
"Helicone-Property-Feature": "summarization",
"Helicone-Property-Version": "2.1",
}
)
Suivi des utilisateurs
# Track per-user usage and costs
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-User-Id": "user-123",
}
)
Suivi des sessions
# Group related requests into sessions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Start of conversation"}],
extra_headers={
"Helicone-Session-Id": "session-abc",
"Helicone-Session-Name": "Customer Support Chat",
"Helicone-Session-Path": "/chat/turn-1",
}
)
Suivi des coûts
Visualisation des coûts via API
# Query request data and costs
curl -s "https://api.helicone.ai/v1/request/query" \
-H "Authorization: Bearer $HELICONE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"filter": {
"properties": {
"environment": {"equals": "production"}
},
"request_created_at": {
"gte": "2025-01-01T00:00:00Z"
}
},
"limit": 100
}'
Alertes de coûts
# Costs are tracked automatically per model, user, and property
# Configure alerts in the Helicone dashboard:
# - Daily/weekly/monthly cost thresholds
# - Per-user spending limits
# - Anomaly detection on cost spikes
Limitation de débit
# Apply rate limits via headers
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
# Rate limit: 100 requests per minute per user
"Helicone-RateLimit-Policy": "100;w=60;u=user",
"Helicone-User-Id": "user-123",
}
)
Mise en cache
# Enable LLM response caching to reduce costs
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
extra_headers={
"Helicone-Cache-Enabled": "true",
}
)
# Set cache bucket size (group similar prompts)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is 2+2?"}],
extra_headers={
"Helicone-Cache-Enabled": "true",
"Helicone-Cache-Bucket-Max-Size": "3", # Cache up to 3 responses
"Helicone-Cache-Seed": "random-seed", # Control cache variation
}
)
Réessais
# Automatic retry with exponential backoff
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Retry-Enabled": "true",
"Helicone-Retry-Num": "3", # Max retries
"Helicone-Retry-Factor": "2", # Backoff multiplier
"Helicone-Retry-Min-Timeout": "1000", # Min wait ms
"Helicone-Retry-Max-Timeout": "10000", # Max wait ms
}
)
Gestion des prompts
# Track prompt versions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Prompt-Id": "summarizer-v2",
"Helicone-Prompt-Version": "2.0",
}
)
Retour d’information et notation
# Add feedback to a logged request
curl -X POST "https://api.helicone.ai/v1/request/{request-id}/feedback" \
-H "Authorization: Bearer $HELICONE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"rating": true,
"score": 0.95
}'
Secours de passerelle
# Configure model fallback through Helicone gateway
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
extra_headers={
"Helicone-Fallbacks": '["gpt-4o-mini", "gpt-3.5-turbo"]',
}
)