LangSmith

LangSmithはLLM搭載アプリケーションのデバッグ、テスト、評価、監視のためにLangChainが構築したオブザーバビリティプラットフォームです。トレーシング、データセット、評価、アノテーションキュー、プロンプトプレイグラウンドを提供します。

インストール

# Install LangSmith SDK
pip install langsmith

# Set environment variables
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_API_KEY="ls-your-api-key"
export LANGCHAIN_PROJECT="my-project"

# Optional: set endpoint for self-hosted
export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"

Tracing

Automatic Tracing with LangChain

# Any LangChain code is automatically traced when env vars are set
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOpenAI(model="gpt-4o")
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant."),
    ("user", "{input}")
])
chain = prompt | llm
# This call is automatically traced in LangSmith
result = chain.invoke({"input": "Hello"})

Manual Tracing with @traceable

from langsmith import traceable

@traceable(name="my-function", run_type="chain")
def my_pipeline(query: str) -> str:
    # All nested calls are captured as child spans
    result = retrieve_docs(query)
    answer = generate_response(query, result)
    return answer

@traceable(run_type="retriever")
def retrieve_docs(query: str) -> list:
    return ["doc1", "doc2"]

@traceable(run_type="llm")
def generate_response(query: str, docs: list) -> str:
    return "Generated answer"

Wrapping OpenAI Directly

from langsmith.wrappers import wrap_openai
from openai import OpenAI

client = wrap_openai(OpenAI())
# All calls are now traced
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello"}]
)

Datasets

Creating Datasets

from langsmith import Client

client = Client()

# Create a dataset
dataset = client.create_dataset(
    dataset_name="qa-examples",
    description="Question-answer evaluation pairs"
)

# Add examples to the dataset
client.create_examples(
    inputs=[
        {"question": "What is LangSmith?"},
        {"question": "How do traces work?"},
    ],
    outputs=[
        {"answer": "An observability platform"},
        {"answer": "Automatic span capture"},
    ],
    dataset_name="qa-examples"
)

Uploading from CSV

# Upload examples from a CSV file
client.upload_csv(
    csv_file="test_data.csv",
    input_keys=["question"],
    output_keys=["answer"],
    dataset_name="qa-from-csv"
)

評価s

Running Evaluations

from langsmith.evaluation import evaluate

# Define the target function to evaluate
def my_app(inputs: dict) -> dict:
    return {"output": f"Answer to: {inputs['question']}"}

# Define a custom evaluator
def correctness(run, example) -> dict:
    prediction = run.outputs["output"]
    reference = example.outputs["answer"]
    score = 1.0 if reference.lower() in prediction.lower() else 0.0
    return {"key": "correctness", "score": score}

# Run evaluation
results = evaluate(
    my_app,
    data="qa-examples",
    evaluators=[correctness],
    experiment_prefix="baseline-v1",
    max_concurrency=4,
)

Built-in Evaluators

from langsmith.evaluation import LangChainStringEvaluator

# Use prebuilt evaluators
qa_evaluator = LangChainStringEvaluator("qa")
cot_evaluator = LangChainStringEvaluator("cot_qa")

results = evaluate(
    my_app,
    data="qa-examples",
    evaluators=[qa_evaluator, cot_evaluator],
    experiment_prefix="with-builtin-evals",
)

Feedback and Annotation

Programmatic Feedback

# Add feedback to a specific run
client.create_feedback(
    run_id="run-uuid-here",
    key="user-rating",
    score=1.0,
    comment="Correct and helpful response"
)

# Add correction feedback
client.create_feedback(
    run_id="run-uuid-here",
    key="correction",
    correction={"output": "The correct answer is..."}
)

Annotation Queues

# Create an annotation queue for human review
queue = client.create_annotation_queue(
    name="review-queue",
    description="Runs needing human review"
)

# Add runs to the queue
client.add_runs_to_annotation_queue(
    queue_id=queue.id,
    run_ids=["run-id-1", "run-id-2"]
)

Prompt Hub

from langsmith import hub

# Pull a prompt from the hub
prompt = hub.pull("my-org/my-prompt")

# Push a prompt to the hub
from langchain_core.prompts import ChatPromptTemplate

my_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a {role}."),
    ("user", "{input}")
])
hub.push("my-org/my-prompt", my_prompt, new_commit_message="Updated system prompt")

APIの使用方法

# List projects
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
  https://api.smith.langchain.com/api/v1/sessions | python3 -m json.tool

# Get runs for a project
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
  "https://api.smith.langchain.com/api/v1/runs?session_name=my-project&limit=10"

# List datasets
curl -s -H "x-api-key: $LANGCHAIN_API_KEY" \
  https://api.smith.langchain.com/api/v1/datasets

Filtering and Querying Runs

# Query runs with filters
runs = client.list_runs(
    project_name="my-project",
    filter='and(eq(status, "error"), gt(latency, 5))',
    limit=50,
)

# Filter by time range
from datetime import datetime, timedelta
runs = client.list_runs(
    project_name="my-project",
    start_time=datetime.now() - timedelta(hours=24),
    run_type="llm",
)

# Get run statistics
for run in runs:
    print(f"{run.name}: {run.total_tokens} tokens, {run.latency}s")

Deployment with LangServe

# Monitor LangServe deployments with tracing
from langserve import add_routes
from fastapi import FastAPI

app = FastAPI()

# Traces are automatically captured for deployed chains
add_routes(app, chain, path="/chat")