Weave
WeaveはWeights & BiasesのLLMオブザーバビリティおよび評価プラットフォームです。LLM呼び出しの自動トレーシング、構造化された評価パイプライン、データセット管理、AIアプリケーションの反復改善のための比較ツールを提供します。
インストール
# Install Weave
pip install weave
# Authenticate with W&B
wandb login
# Or set API key directly
export WANDB_API_KEY="your-api-key"
Initialization
import weave
# Initialize a Weave project
weave.init("my-project")
Tracing with @weave.op()
Basic Function Tracing
import weave
weave.init("my-project")
@weave.op()
def generate_response(prompt: str) -> str:
"""Every call to this function is automatically traced."""
import openai
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
# This call is traced with inputs, outputs, and metadata
result = generate_response("What is Weave?")
Nested Tracing
@weave.op()
def retrieve_context(query: str) -> list[str]:
# Simulated retrieval
return ["context chunk 1", "context chunk 2"]
@weave.op()
def format_prompt(query: str, context: list[str]) -> str:
ctx = "\n".join(context)
return f"Context:\n{ctx}\n\nQuestion: {query}"
@weave.op()
def rag_pipeline(query: str) -> str:
"""Parent trace automatically captures child spans."""
context = retrieve_context(query)
prompt = format_prompt(query, context)
return generate_response(prompt)
Tracing Classes
class MyAssistant(weave.Model):
model_name: str
temperature: float = 0.7
@weave.op()
def predict(self, query: str) -> str:
import openai
client = openai.OpenAI()
response = client.chat.completions.create(
model=self.model_name,
temperature=self.temperature,
messages=[{"role": "user", "content": query}]
)
return response.choices[0].message.content
assistant = MyAssistant(model_name="gpt-4o")
# Model parameters are tracked alongside each call
result = assistant.predict("Explain Weave tracing")
Datasets
Creating Datasets
# Create a dataset from a list of examples
dataset = weave.Dataset(
name="qa-pairs",
rows=[
{"question": "What is Weave?", "expected": "An observability tool"},
{"question": "Who made Weave?", "expected": "Weights & Biases"},
{"question": "What does @weave.op do?", "expected": "Traces functions"},
]
)
# Publish the dataset
weave.publish(dataset)
Loading Datasets
# Load a published dataset by reference
dataset = weave.ref("qa-pairs:latest").get()
# Iterate over rows
for row in dataset.rows:
print(row["question"], row["expected"])
評価s
Running Evaluations
# Define a scorer function
@weave.op()
def exact_match_scorer(expected: str, model_output: str) -> dict:
match = expected.lower() in model_output.lower()
return {"match": match}
@weave.op()
def length_scorer(model_output: str) -> dict:
return {"length": len(model_output), "concise": len(model_output) < 200}
# Create and run an evaluation
evaluation = weave.Evaluation(
dataset=dataset,
scorers=[exact_match_scorer, length_scorer],
)
# Evaluate a model
results = await evaluation.evaluate(assistant)
print(results)
Comparing Runs
# Evaluate multiple model variants for comparison
model_a = MyAssistant(model_name="gpt-4o", temperature=0.3)
model_b = MyAssistant(model_name="gpt-4o-mini", temperature=0.7)
results_a = await evaluation.evaluate(model_a)
results_b = await evaluation.evaluate(model_b)
# Compare results in the Weave UI dashboard
Logging Custom Calls
# Log arbitrary data to a trace
@weave.op()
def process_with_metadata(text: str) -> dict:
tokens_used = len(text.split())
result = {"processed": text.upper(), "tokens": tokens_used}
return result
Integrations
OpenAI Integration
import weave
from openai import OpenAI
weave.init("openai-tracing")
# OpenAI calls are auto-patched when weave is initialized
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}]
)
Anthropic Integration
import weave
from anthropic import Anthropic
weave.init("anthropic-tracing")
# Anthropic calls are also auto-traced
client = Anthropic()
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}]
)
LangChain Integration
import weave
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
weave.init("langchain-tracing")
# LangChain traces are captured automatically
llm = ChatOpenAI(model="gpt-4o")
chain = ChatPromptTemplate.from_template("Tell me about {topic}") | llm
result = chain.invoke({"topic": "Weave"})
Publishing and Versioning
# Publish any object to Weave for versioning
@weave.op()
def my_scorer(output: str) -> dict:
return {"score": len(output)}
weave.publish(my_scorer, name="my-scorer")
# Retrieve a specific version
scorer_v1 = weave.ref("my-scorer:v0").get()
scorer_latest = weave.ref("my-scorer:latest").get()
Feedback Collection
# Add feedback to a call via the API
from weave.trace.weave_client import get_weave_client
client = get_weave_client()
# After running a traced function, add feedback
call = generate_response.call("Test prompt")
call.feedback.add_reaction("👍")
call.feedback.add_note("Great response, very accurate")