LangSmith
LangSmith é a plataforma de observabilidade construída pela LangChain para depuração, teste, avaliação e monitoramento de aplicações alimentadas por LLM. Fornece rastreamento, conjuntos de dados, avaliações, filas de anotação e um playground de prompts.
Instalação
# Install LangSmith SDK
pip install langsmith
# Set environment variables
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY="ls-your-api-key"
export LANGCHAIN_PROJECT="my-project"
# Optional: set endpoint for self-hosted
export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
Tracing
Automatic Tracing with LangChain
# Any LangChain code is automatically traced when env vars are set
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
llm = ChatOpenAI(model="gpt-4o")
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant."),
("user", "{input}")
])
chain = prompt | llm
# This call is automatically traced in LangSmith
result = chain.invoke({"input": "Hello"})
Manual Tracing with @traceable
from langsmith import traceable
@traceable(name="my-function", run_type="chain")
def my_pipeline(query: str) -> str:
# All nested calls are captured as child spans
result = retrieve_docs(query)
answer = generate_response(query, result)
return answer
@traceable(run_type="retriever")
def retrieve_docs(query: str) -> list:
return ["doc1", "doc2"]
@traceable(run_type="llm")
def generate_response(query: str, docs: list) -> str:
return "Generated answer"
Wrapping OpenAI Directly
from langsmith.wrappers import wrap_openai
from openai import OpenAI
client = wrap_openai(OpenAI())
# All calls are now traced
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}]
)
Datasets
Creating Datasets
from langsmith import Client
client = Client()
# Create a dataset
dataset = client.create_dataset(
dataset_name="qa-examples",
description="Question-answer evaluation pairs"
)
# Add examples to the dataset
client.create_examples(
inputs=[
{"question": "What is LangSmith?"},
{"question": "How do traces work?"},
],
outputs=[
{"answer": "An observability platform"},
{"answer": "Automatic span capture"},
],
dataset_name="qa-examples"
)
Uploading from CSV
# Upload examples from a CSV file
client.upload_csv(
csv_file="test_data.csv",
input_keys=["question"],
output_keys=["answer"],
dataset_name="qa-from-csv"
)
Evaluations
Running Evaluations
from langsmith.evaluation import evaluate
# Define the target function to evaluate
def my_app(inputs: dict) -> dict:
return {"output": f"Answer to: {inputs['question']}"}
# Define a custom evaluator
def correctness(run, example) -> dict:
prediction = run.outputs["output"]
reference = example.outputs["answer"]
score = 1.0 if reference.lower() in prediction.lower() else 0.0
return {"key": "correctness", "score": score}
# Run evaluation
results = evaluate(
my_app,
data="qa-examples",
evaluators=[correctness],
experiment_prefix="baseline-v1",
max_concurrency=4,
)
Built-in Evaluators
from langsmith.evaluation import LangChainStringEvaluator
# Use prebuilt evaluators
qa_evaluator = LangChainStringEvaluator("qa")
cot_evaluator = LangChainStringEvaluator("cot_qa")
results = evaluate(
my_app,
data="qa-examples",
evaluators=[qa_evaluator, cot_evaluator],
experiment_prefix="with-builtin-evals",
)
Feedback and Annotation
Programmatic Feedback
# Add feedback to a specific run
client.create_feedback(
run_id="run-uuid-here",
key="user-rating",
score=1.0,
comment="Correct and helpful response"
)
# Add correction feedback
client.create_feedback(
run_id="run-uuid-here",
key="correction",
correction={"output": "The correct answer is..."}
)
Annotation Queues
# Create an annotation queue for human review
queue = client.create_annotation_queue(
name="review-queue",
description="Runs needing human review"
)
# Add runs to the queue
client.add_runs_to_annotation_queue(
queue_id=queue.id,
run_ids=["run-id-1", "run-id-2"]
)
Prompt Hub
from langsmith import hub
# Pull a prompt from the hub
prompt = hub.pull("my-org/my-prompt")
# Push a prompt to the hub
from langchain_core.prompts import ChatPromptTemplate
my_prompt = ChatPromptTemplate.from_messages([
("system", "You are a {role}."),
("user", "{input}")
])
hub.push("my-org/my-prompt", my_prompt, new_commit_message="Updated system prompt")
Uso da API
# List projects
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
https://api.smith.langchain.com/api/v1/sessions | python3 -m json.tool
# Get runs for a project
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
"https://api.smith.langchain.com/api/v1/runs?session_name=my-project&limit=10"
# List datasets
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
https://api.smith.langchain.com/api/v1/datasets
Filtering and Querying Runs
# Query runs with filters
runs = client.list_runs(
project_name="my-project",
filter='and(eq(status, "error"), gt(latency, 5))',
limit=50,
)
# Filter by time range
from datetime import datetime, timedelta
runs = client.list_runs(
project_name="my-project",
start_time=datetime.now() - timedelta(hours=24),
run_type="llm",
)
# Get run statistics
for run in runs:
print(f"{run.name}: {run.total_tokens} tokens, {run.latency}s")
Implantação with LangServe
# Monitor LangServe deployments with tracing
from langserve import add_routes
from fastapi import FastAPI
app = FastAPI()
# Traces are automatically captured for deployed chains
add_routes(app, chain, path="/chat")