Commandes LangSmith
LangSmith est la plateforme d’observabilité construite par LangChain pour déboguer, tester, évaluer et surveiller les applications LLM en production. Elle fournit le traçage, l’évaluation de datasets, l’annotation humaine et la surveillance des performances.
Installation
# Install LangSmith SDK
pip install langsmith
# Set environment variables
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY="ls-your-api-key"
export LANGCHAIN_PROJECT="my-project"
# Optional: set endpoint for self-hosted
export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
Traçage
Traçage automatique avec LangChain
# Any LangChain code is automatically traced when env vars are set
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
llm = ChatOpenAI(model="gpt-4o")
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant."),
("user", "{input}")
])
chain = prompt | llm
# This call is automatically traced in LangSmith
result = chain.invoke({"input": "Hello"})
Traçage manuel avec @traceable
from langsmith import traceable
@traceable(name="my-function", run_type="chain")
def my_pipeline(query: str) -> str:
# All nested calls are captured as child spans
result = retrieve_docs(query)
answer = generate_response(query, result)
return answer
@traceable(run_type="retriever")
def retrieve_docs(query: str) -> list:
return ["doc1", "doc2"]
@traceable(run_type="llm")
def generate_response(query: str, docs: list) -> str:
return "Generated answer"
Encapsuler OpenAI directement
from langsmith.wrappers import wrap_openai
from openai import OpenAI
client = wrap_openai(OpenAI())
# All calls are now traced
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}]
)
Jeux de données
Création de jeux de données
from langsmith import Client
client = Client()
# Create a dataset
dataset = client.create_dataset(
dataset_name="qa-examples",
description="Question-answer evaluation pairs"
)
# Add examples to the dataset
client.create_examples(
inputs=[
{"question": "What is LangSmith?"},
{"question": "How do traces work?"},
],
outputs=[
{"answer": "An observability platform"},
{"answer": "Automatic span capture"},
],
dataset_name="qa-examples"
)
Téléchargement depuis CSV
# Upload examples from a CSV file
client.upload_csv(
csv_file="test_data.csv",
input_keys=["question"],
output_keys=["answer"],
dataset_name="qa-from-csv"
)
Évaluations
Exécution d’évaluations
from langsmith.evaluation import evaluate
# Define the target function to evaluate
def my_app(inputs: dict) -> dict:
return {"output": f"Answer to: {inputs['question']}"}
# Define a custom evaluator
def correctness(run, example) -> dict:
prediction = run.outputs["output"]
reference = example.outputs["answer"]
score = 1.0 if reference.lower() in prediction.lower() else 0.0
return {"key": "correctness", "score": score}
# Run evaluation
results = evaluate(
my_app,
data="qa-examples",
evaluators=[correctness],
experiment_prefix="baseline-v1",
max_concurrency=4,
)
Évaluateurs intégrés
from langsmith.evaluation import LangChainStringEvaluator
# Use prebuilt evaluators
qa_evaluator = LangChainStringEvaluator("qa")
cot_evaluator = LangChainStringEvaluator("cot_qa")
results = evaluate(
my_app,
data="qa-examples",
evaluators=[qa_evaluator, cot_evaluator],
experiment_prefix="with-builtin-evals",
)
Retour d’information et annotation
Retour d’information programmatique
# Add feedback to a specific run
client.create_feedback(
run_id="run-uuid-here",
key="user-rating",
score=1.0,
comment="Correct and helpful response"
)
# Add correction feedback
client.create_feedback(
run_id="run-uuid-here",
key="correction",
correction={"output": "The correct answer is..."}
)
Files d’attente d’annotation
# Create an annotation queue for human review
queue = client.create_annotation_queue(
name="review-queue",
description="Runs needing human review"
)
# Add runs to the queue
client.add_runs_to_annotation_queue(
queue_id=queue.id,
run_ids=["run-id-1", "run-id-2"]
)
Hub de prompts
from langsmith import hub
# Pull a prompt from the hub
prompt = hub.pull("my-org/my-prompt")
# Push a prompt to the hub
from langchain_core.prompts import ChatPromptTemplate
my_prompt = ChatPromptTemplate.from_messages([
("system", "You are a {role}."),
("user", "{input}")
])
hub.push("my-org/my-prompt", my_prompt, new_commit_message="Updated system prompt")
Utilisation de l’API
# List projects
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
https://api.smith.langchain.com/api/v1/sessions | python3 -m json.tool
# Get runs for a project
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
"https://api.smith.langchain.com/api/v1/runs?session_name=my-project&limit=10"
# List datasets
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
https://api.smith.langchain.com/api/v1/datasets
Filtrage et requête d’exécutions
# Query runs with filters
runs = client.list_runs(
project_name="my-project",
filter='and(eq(status, "error"), gt(latency, 5))',
limit=50,
)
# Filter by time range
from datetime import datetime, timedelta
runs = client.list_runs(
project_name="my-project",
start_time=datetime.now() - timedelta(hours=24),
run_type="llm",
)
# Get run statistics
for run in runs:
print(f"{run.name}: {run.total_tokens} tokens, {run.latency}s")
Déploiement avec LangServe
# Monitor LangServe deployments with tracing
from langserve import add_routes
from fastapi import FastAPI
app = FastAPI()
# Traces are automatically captured for deployed chains
add_routes(app, chain, path="/chat")