Instructor is a Python library that makes it easy to extract structured, validated data from LLMs. Built on Pydantic, it patches LLM clients to accept a response_model argument and automatically handles retries, validation, and schema generation.
GitHub: https://github.com/jxnl/instructor
Docs: https://python.useinstructor.com
PyPI: https://pypi.org/project/instructor
Installation
# Core install (OpenAI backend)
pip install instructor
# With optional backends
pip install "instructor[anthropic]" # Anthropic Claude
pip install "instructor[google]" # Google Gemini
pip install "instructor[groq]" # Groq
pip install "instructor[cohere]" # Cohere
pip install "instructor[litellm]" # LiteLLM (100+ providers)
pip install "instructor[vertexai]" # Google Vertex AI
# Full install (all extras)
pip install "instructor[all]"
# Required dependency (auto-installed)
pip install pydantic>=2.0
Configuration
Provider Patching
import instructor
from openai import OpenAI
from anthropic import Anthropic
import ollama
# OpenAI (default mode: TOOLS)
client = instructor.from_openai(OpenAI(api_key="sk-..."))
# Anthropic
client = instructor.from_anthropic(Anthropic(api_key="sk-ant-..."))
# Ollama (local models)
client = instructor.from_openai(
OpenAI(base_url="http://localhost:11434/v1", api_key="ollama"),
mode=instructor.Mode.JSON, # Ollama works best with JSON mode
)
# LiteLLM (unified interface for 100+ providers)
import litellm
client = instructor.from_litellm(litellm.completion)
# Google Gemini
import google.generativeai as genai
client = instructor.from_gemini(genai.GenerativeModel("gemini-1.5-flash"))
import instructor
# Available modes
instructor.Mode.TOOLS # Default for OpenAI (function calling)
instructor.Mode.JSON # JSON mode (model outputs raw JSON)
instructor.Mode.MD_JSON # Extract JSON from markdown code block
instructor.Mode.FUNCTIONS # Legacy OpenAI functions API
instructor.Mode.PARALLEL_TOOLS # Multiple tool calls in one response
instructor.Mode.ANTHROPIC_TOOLS # Anthropic-specific tool use
instructor.Mode.GEMINI_JSON # Gemini JSON mode
Core API
Response Model Patterns
| Pattern | Use Case |
|---|
response_model=MyModel | Extract single structured object |
response_model=list[MyModel] | Extract list of objects |
response_model=Optional[MyModel] | Optional extraction (may return None) |
response_model=Iterable[MyModel] | Stream list items as they generate |
response_model=Partial[MyModel] | Stream partial (incomplete) objects |
stream=True + response_model | Async streaming with validation |
Retry Configuration
| Parameter | Description | Default |
|---|
max_retries | Max validation retry attempts | 0 |
validation_context | Extra context passed to validators | None |
strict | Strict Pydantic validation mode | False |
Key Instructor Methods
| Method | Description |
|---|
client.chat.completions.create(response_model=M) | Sync structured extraction |
await client.chat.completions.create(response_model=M) | Async structured extraction |
instructor.from_openai(client, mode=...) | Patch OpenAI client |
instructor.from_anthropic(client) | Patch Anthropic client |
instructor.patch(client) | Legacy patch (still works) |
instructor.dsl.Partial[Model] | Streaming partial model type |
Advanced Usage
import instructor
from openai import OpenAI
from pydantic import BaseModel, Field
client = instructor.from_openai(OpenAI())
class Person(BaseModel):
name: str = Field(description="Full name of the person")
age: int = Field(ge=0, le=150, description="Age in years")
email: str | None = Field(default=None, description="Email address if mentioned")
# Single object extraction
person = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "John Doe is 30 years old. Email: john@example.com"}],
response_model=Person,
)
print(person.name) # "John Doe"
print(person.age) # 30
print(person.email) # "john@example.com"
# List extraction
class People(BaseModel):
people: list[Person]
result = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Alice is 25, Bob is 30, Carol is 28."}],
response_model=People,
)
for p in result.people:
print(f"{p.name}: {p.age}")
Validation and Retry Logic
from pydantic import BaseModel, field_validator, model_validator
import instructor
client = instructor.from_openai(OpenAI(), mode=instructor.Mode.TOOLS)
class EmailAddress(BaseModel):
email: str
domain: str
@field_validator("email")
@classmethod
def validate_email(cls, v: str) -> str:
if "@" not in v:
raise ValueError(f"'{v}' is not a valid email address")
return v.lower()
@model_validator(mode="after")
def extract_domain(self) -> "EmailAddress":
self.domain = self.email.split("@")[1]
return self
# Instructor will retry up to 3 times if validation fails
result = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "My contact is john at example dot com"}],
response_model=EmailAddress,
max_retries=3, # Retry with validation error feedback
)
print(result.email) # "john@example.com"
print(result.domain) # "example.com"
Nested and Complex Models
from pydantic import BaseModel
from enum import Enum
from datetime import date
class Priority(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class Task(BaseModel):
title: str
description: str
priority: Priority
due_date: date | None = None
assignee: str | None = None
subtasks: list[str] = []
class ProjectPlan(BaseModel):
project_name: str
tasks: list[Task]
total_estimated_days: int
plan = client.chat.completions.create(
model="gpt-4o",
messages=[{
"role": "user",
"content": "Create a 3-task project plan for building a REST API.",
}],
response_model=ProjectPlan,
)
for task in plan.tasks:
print(f"[{task.priority.upper()}] {task.title}")
Streaming Partial Responses
from instructor.dsl.partial import Partial
class Report(BaseModel):
title: str
summary: str
key_findings: list[str]
recommendations: list[str]
# Stream partial model as it generates
for partial_report in client.chat.completions.create_partial(
model="gpt-4o",
messages=[{"role": "user", "content": "Analyze the state of AI in 2025."}],
response_model=Partial[Report],
):
# partial_report fills in progressively
if partial_report.title:
print(f"Title: {partial_report.title}")
if partial_report.key_findings:
print(f"Findings so far: {len(partial_report.key_findings)}")
Async Usage
import asyncio
import instructor
from openai import AsyncOpenAI
async_client = instructor.from_openai(AsyncOpenAI())
class Sentiment(BaseModel):
label: str # "positive", "negative", "neutral"
score: float # 0.0 to 1.0
reason: str
async def analyze(text: str) -> Sentiment:
return await async_client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": f"Analyze sentiment: {text}"}],
response_model=Sentiment,
)
async def batch_analyze(texts: list[str]) -> list[Sentiment]:
return await asyncio.gather(*[analyze(t) for t in texts])
results = asyncio.run(batch_analyze([
"I love this product!",
"This is terrible.",
"It's okay I guess.",
]))
Multi-Provider with Anthropic
import instructor
from anthropic import Anthropic
client = instructor.from_anthropic(Anthropic(api_key="sk-ant-..."))
class CodeReview(BaseModel):
score: int = Field(ge=1, le=10)
issues: list[str]
suggestions: list[str]
overall_verdict: str
review = client.messages.create(
model="claude-opus-4-5",
max_tokens=1024,
messages=[{
"role": "user",
"content": f"Review this code:\n```python\ndef add(a, b):\n return a + b\n```",
}],
response_model=CodeReview,
)
print(f"Score: {review.score}/10")
Using with Ollama
import instructor
from openai import OpenAI
# Connect to local Ollama
client = instructor.from_openai(
OpenAI(base_url="http://localhost:11434/v1", api_key="ollama"),
mode=instructor.Mode.JSON,
)
class Joke(BaseModel):
setup: str
punchline: str
topic: str
joke = client.chat.completions.create(
model="llama3.2",
messages=[{"role": "user", "content": "Tell me a programming joke."}],
response_model=Joke,
)
print(f"Q: {joke.setup}\nA: {joke.punchline}")
Common Workflows
from typing import Optional
from pydantic import BaseModel
class CompanyInfo(BaseModel):
name: str
founded_year: Optional[int] = None
headquarters: Optional[str] = None
ceo: Optional[str] = None
products: list[str] = []
revenue_usd_billions: Optional[float] = None
def extract_company_info(text: str) -> CompanyInfo:
return client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "Extract company information from text. Only include information explicitly stated."},
{"role": "user", "content": text},
],
response_model=CompanyInfo,
max_retries=2,
)
Classification Pipeline
from enum import Enum
class TicketCategory(str, Enum):
BUG = "bug"
FEATURE = "feature_request"
QUESTION = "question"
BILLING = "billing"
OTHER = "other"
class SupportTicket(BaseModel):
category: TicketCategory
priority: Priority
summary: str = Field(max_length=100)
requires_human: bool
def classify_ticket(ticket_text: str) -> SupportTicket:
return client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": f"Classify this support ticket:\n{ticket_text}"}],
response_model=SupportTicket,
)
Tips and Best Practices
| Topic | Recommendation |
|---|
| Field descriptions | Use Field(description=...) — it gets included in the schema sent to the LLM |
| Optional fields | Mark uncertain fields as Optional[T] = None to avoid hallucination |
| Retry count | Set max_retries=3 for complex/validated models; 0 for simple extractions |
| Model choice | GPT-4o for complex nested models; GPT-4o-mini or Claude Haiku for simple extractions |
| Mode selection | Use Mode.TOOLS for OpenAI, Mode.JSON for Ollama, Mode.ANTHROPIC_TOOLS for Claude |
| Streaming | Use create_partial() to show progressive results to users |
| Enum types | Always use str enums (class X(str, Enum)) for reliable LLM serialization |
| List length | Add Field(min_length=1, max_length=10) to list fields to prevent runaway lists |
| Validation errors | Instructor sends validation errors back to the LLM as feedback for retries |
| Async | Use AsyncOpenAI + asyncio.gather() for concurrent extractions |
| Cost | Each retry costs tokens; add validators only where data quality truly matters |
| Debugging | Set INSTRUCTOR_LOGGING=debug env var to see request/response payloads |