LangChain Framework Cheat Sheet

Overview

LangChain is a comprehensive framework for developing applications powered by large language models (LLMs), designed to simplify every stage of the LLM application lifecycle from development to deployment. Created to address the complexity of building production-ready AI applications, LangChain provides a unified interface for working with various LLMs while offering powerful abstractions for chaining operations, managing context, and integrating with external systems.

What makes LangChain particularly powerful is its modular architecture that allows developers to compose complex AI workflows using simple, reusable components. The framework excels at creating context-aware, reasoning applications that can connect LLMs to private data sources, external APIs, and specialized tools. With its LangChain Expression Language (LCEL), developers can build sophisticated chains that handle everything from simple question-answering to complex multi-step reasoning and autonomous agent behaviors.

LangChain has become the de facto standard for LLM application development, supporting integration with virtually every major LLM provider and offering extensive tooling for production deployment, monitoring, and evaluation through its companion platform LangSmith.

Installation and Setup

Basic Installation

# Install core LangChain
pip install langchain

# Install with specific integrations
pip install langchain-openai
pip install langchain-anthropic
pip install langchain-google-genai
pip install langchain-community

# Install additional components
pip install langchain-experimental
pip install langsmith  # For monitoring and evaluation
pip install langgraph  # For advanced agent workflows

Environment Configuration

import os
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic

# Set up API keys
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-api-key"
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-langsmith-api-key"

# Initialize LLMs
openai_llm = ChatOpenAI(model="gpt-4", temperature=0.7)
anthropic_llm = ChatAnthropic(model="claude-3-sonnet-20240229")

Project Structure

langchain_project/
├── chains/
│   ├── __init__.py
│   ├── qa_chain.py
│   └── summarization_chain.py
├── agents/
│   ├── __init__.py
│   ├── research_agent.py
│   └── analysis_agent.py
├── tools/
│   ├── __init__.py
│   ├── search_tools.py
│   └── database_tools.py
├── prompts/
│   ├── __init__.py
│   └── templates.py
├── memory/
│   ├── __init__.py
│   └── custom_memory.py
├── config/
│   ├── __init__.py
│   └── settings.py
└── main.py

LangChain Expression Language (LCEL)

Basic Chain Construction

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# Create a simple chain using LCEL
prompt = ChatPromptTemplate.from_template("Tell me a joke about \\\\{topic\\\\}")
llm = ChatOpenAI(model="gpt-3.5-turbo")
output_parser = StrOutputParser()

# Chain components using the pipe operator
chain = prompt|llm|output_parser

# Execute the chain
result = chain.invoke(\\\\{"topic": "programming"\\\\})
print(result)

Complex Chain Composition

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# RAG (Retrieval Augmented Generation) chain
vectorstore = FAISS.from_texts(
    ["LangChain is a framework for LLM applications",
     "LCEL is the expression language for LangChain"],
    embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

# Complex chain with parallel processing
rag_chain = (
    RunnableParallel(\\\\{
        "context": retriever,
        "question": RunnablePassthrough()
    \\\\})
|ChatPromptTemplate.from_template("""
        Answer the question based on the context:
        Context: \\\\{context\\\\}
        Question: \\\\{question\\\\}
        Answer:
    """)
|ChatOpenAI()
|StrOutputParser()
)

result = rag_chain.invoke("What is LangChain?")

Conditional Logic in Chains

from langchain_core.runnables import RunnableBranch
from langchain_core.prompts import ChatPromptTemplate

# Conditional chain execution
def route_question(info):
    if "math" in info["question"].lower():
        return "math"
    elif "history" in info["question"].lower():
        return "history"
    else:
        return "general"

# Different prompts for different question types
math_prompt = ChatPromptTemplate.from_template(
    "You are a math expert. Solve this problem: \\\\{question\\\\}"
)
history_prompt = ChatPromptTemplate.from_template(
    "You are a history expert. Answer this question: \\\\{question\\\\}"
)
general_prompt = ChatPromptTemplate.from_template(
    "Answer this general question: \\\\{question\\\\}"
)

# Branching chain
branching_chain = RunnableBranch(
    (lambda x: route_question(x) == "math", math_prompt|llm),
    (lambda x: route_question(x) == "history", history_prompt|llm),
    general_prompt|llm  # Default branch
)

result = branching_chain.invoke(\\\\{"question": "What is 2+2?"\\\\})

Streaming and Async Support

import asyncio
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")
prompt = ChatPromptTemplate.from_template("Write a story about \\\\{topic\\\\}")
chain = prompt|llm

# Streaming execution
for chunk in chain.stream(\\\\{"topic": "space exploration"\\\\}):
    print(chunk.content, end="", flush=True)

# Async execution
async def async_chain_execution():
    result = await chain.ainvoke(\\\\{"topic": "artificial intelligence"\\\\})
    return result

# Batch processing
batch_inputs = [
    \\\\{"topic": "robots"\\\\},
    \\\\{"topic": "future"\\\\},
    \\\\{"topic": "technology"\\\\}
]
batch_results = chain.batch(batch_inputs)

Prompts and Templates

Basic Prompt Templates

from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate

# Simple prompt template
simple_prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes \\\\{product\\\\}?"
)

# Chat prompt template
chat_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(
        "You are a helpful assistant that translates \\\\{input_language\\\\} to \\\\{output_language\\\\}."
    ),
    HumanMessagePromptTemplate.from_template("\\\\{text\\\\}")
])

# Using the templates
formatted_prompt = simple_prompt.format(product="colorful socks")
chat_messages = chat_prompt.format_messages(
    input_language="English",
    output_language="French",
    text="I love programming"
)

Advanced Prompt Engineering

from langchain_core.prompts import ChatPromptTemplate, FewShotPromptTemplate
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# Few-shot prompting
examples = [
    \\\\{"input": "happy", "output": "sad"\\\\},
    \\\\{"input": "tall", "output": "short"\\\\},
    \\\\{"input": "energetic", "output": "lethargic"\\\\}
]

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Input: \\\\{input\\\\}\nOutput: \\\\{output\\\\}"
)

# Semantic similarity example selector
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),
    FAISS,
    k=2
)

few_shot_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Give the antonym of every input",
    suffix="Input: \\\\{adjective\\\\}\nOutput:",
    input_variables=["adjective"]
)

# Dynamic prompt selection
def select_prompt_based_on_input(input_text):
    if len(input_text.split()) > 50:
        return long_text_prompt
    else:
        return short_text_prompt

dynamic_chain = (
    RunnablePassthrough.assign(
        prompt=lambda x: select_prompt_based_on_input(x["text"])
    )
|(lambda x: x["prompt"].format(**x))
|llm
)

Prompt Composition and Reuse

from langchain_core.prompts import ChatPromptTemplate

# Reusable prompt components
system_context = """You are an expert \\\\{domain\\\\} consultant with over 10 years of experience.
You provide detailed, actionable advice based on industry best practices."""

analysis_template = """Analyze the following \\\\{item_type\\\\}:

\\\\{content\\\\}

Please provide:
1. Key strengths and weaknesses
2. Recommendations for improvement
3. Industry benchmarks and comparisons
4. Next steps and action items"""

# Compose prompts dynamically
def create_analysis_prompt(domain, item_type):
    return ChatPromptTemplate.from_messages([
        ("system", system_context.format(domain=domain)),
        ("human", analysis_template.format(item_type=item_type, content="\\\\{content\\\\}"))
    ])

# Create specialized prompts
marketing_prompt = create_analysis_prompt("marketing", "campaign")
financial_prompt = create_analysis_prompt("finance", "budget")
technical_prompt = create_analysis_prompt("technology", "architecture")

Memory Management

Basic Memory Types

from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory
from langchain.memory import ConversationBufferWindowMemory, ConversationTokenBufferMemory
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

# Buffer memory - stores all conversation history
buffer_memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Window memory - keeps only last k interactions
window_memory = ConversationBufferWindowMemory(
    k=5,
    memory_key="chat_history",
    return_messages=True
)

# Summary memory - summarizes old conversations
summary_memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",
    return_messages=True
)

# Token buffer memory - limits by token count
token_memory = ConversationTokenBufferMemory(
    llm=llm,
    max_token_limit=1000,
    memory_key="chat_history",
    return_messages=True
)

Custom Memory Implementation

from langchain.memory.chat_memory import BaseChatMemory
from langchain_core.messages import BaseMessage
from typing import List, Dict, Any
import json

class DatabaseChatMemory(BaseChatMemory):
    """Custom memory that persists to database"""

    def __init__(self, session_id: str, connection_string: str):
        super().__init__()
        self.session_id = session_id
        self.connection_string = connection_string
        self.setup_database()

    def setup_database(self):
        # Initialize database connection and tables
        pass

    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        # Save conversation to database
        conversation_data = \\\\{
            "session_id": self.session_id,
            "inputs": inputs,
            "outputs": outputs,
            "timestamp": datetime.now()
        \\\\}
        self.save_to_database(conversation_data)

    def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        # Load conversation history from database
        history = self.load_from_database(self.session_id)
        return \\\\{"chat_history": history\\\\}

    def clear(self) -> None:
        # Clear conversation history
        self.clear_database_session(self.session_id)

# Usage
custom_memory = DatabaseChatMemory(
    session_id="user_123",
    connection_string="postgresql://localhost/chatbot"
)

Memory with LCEL Chains

from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Memory integration with LCEL
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "\\\\{input\\\\}")
])

def load_memory(input_dict):
    return memory.load_memory_variables(\\\\{\\\\})

def save_memory(input_dict, output):
    memory.save_context(
        \\\\{"input": input_dict["input"]\\\\},
        \\\\{"output": output.content\\\\}
    )
    return output

# Chain with memory
memory_chain = (
    RunnablePassthrough.assign(
        chat_history=RunnableLambda(load_memory)
    )
|prompt
|llm
|RunnableLambda(lambda output: save_memory(\\\\{"input": "current_input"\\\\}, output))
)

Persistent Memory Storage

import pickle
import os
from langchain.memory import ConversationBufferMemory

class PersistentMemory:
    def __init__(self, file_path: str):
        self.file_path = file_path
        self.memory = self.load_memory()

    def load_memory(self):
        if os.path.exists(self.file_path):
            with open(self.file_path, 'rb') as f:
                return pickle.load(f)
        else:
            return ConversationBufferMemory(
                memory_key="chat_history",
                return_messages=True
            )

    def save_memory(self):
        with open(self.file_path, 'wb') as f:
            pickle.dump(self.memory, f)

    def add_message(self, inputs, outputs):
        self.memory.save_context(inputs, outputs)
        self.save_memory()

    def get_memory_variables(self):
        return self.memory.load_memory_variables(\\\\{\\\\})

    def clear_memory(self):
        self.memory.clear()
        self.save_memory()

# Usage
persistent_memory = PersistentMemory("./chat_memory.pkl")

Tools and Function Calling

Built-in Tools

from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import ShellTool
from langchain_community.tools.file_management import WriteFileTool, ReadFileTool

# Search tools
search_tool = DuckDuckGoSearchRun()
wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

# File management tools
write_tool = WriteFileTool()
read_tool = ReadFileTool()

# System tools
shell_tool = ShellTool()

# Using tools directly
search_result = search_tool.run("LangChain framework")
wiki_result = wikipedia_tool.run("artificial intelligence")

Custom Tool Development

from langchain.tools import BaseTool
from typing import Optional, Type
from pydantic import BaseModel, Field
import requests

class WeatherInput(BaseModel):
    location: str = Field(description="Location to get weather for")

class WeatherTool(BaseTool):
    name = "weather_tool"
    description = "Get current weather information for a location"
    args_schema: Type[BaseModel] = WeatherInput

    def _run(self, location: str) -> str:
        # Implement weather API call
        try:
            api_key = os.getenv("WEATHER_API_KEY")
            url = f"http://api.openweathermap.org/data/2.5/weather?q=\\\\{location\\\\}&appid=\\\\{api_key\\\\}"
            response = requests.get(url)
            data = response.json()

            if response.status_code == 200:
                temp = data['main']['temp'] - 273.15  # Convert from Kelvin
                description = data['weather'][0]['description']
                return f"Weather in \\\\{location\\\\}: \\\\{temp:.1f\\\\}°C, \\\\{description\\\\}"
            else:
                return f"Could not get weather for \\\\{location\\\\}"
        except Exception as e:
            return f"Error getting weather: \\\\{str(e)\\\\}"

    async def _arun(self, location: str) -> str:
        # Async implementation
        return self._run(location)

# Database query tool
class DatabaseTool(BaseTool):
    name = "database_query"
    description = "Execute SQL queries against the database"

    def _run(self, query: str) -> str:
        # Implement database query logic
        try:
            # Connect to database and execute query
            result = execute_sql_query(query)
            return f"Query result: \\\\{result\\\\}"
        except Exception as e:
            return f"Database error: \\\\{str(e)\\\\}"

# API integration tool
class APITool(BaseTool):
    name = "api_call"
    description = "Make HTTP requests to external APIs"

    def _run(self, endpoint: str, method: str = "GET", data: dict = None) -> str:
        try:
            if method.upper() == "GET":
                response = requests.get(endpoint)
            elif method.upper() == "POST":
                response = requests.post(endpoint, json=data)

            return response.json()
        except Exception as e:
            return f"API error: \\\\{str(e)\\\\}"

Tool Integration with Chains

from langchain.tools import Tool
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import create_tool_calling_agent, AgentExecutor

# Create tools list
tools = [
    WeatherTool(),
    DatabaseTool(),
    APITool(),
    search_tool,
    wikipedia_tool
]

# Tool-calling chain
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant with access to various tools."),
    ("human", "\\\\{input\\\\}"),
    ("placeholder", "\\\\{agent_scratchpad\\\\}")
])

# Create agent with tools
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Execute with tools
result = agent_executor.invoke(\\\\{
    "input": "What's the weather in New York and find information about climate change?"
\\\\})

Agents and Autonomous Behavior

Basic Agent Types

from langchain.agents import create_react_agent, create_openai_functions_agent
from langchain.agents import AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# ReAct agent (Reasoning and Acting)
react_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant"),
    ("human", "\\\\{input\\\\}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

react_agent = create_react_agent(llm, tools, react_prompt)
react_executor = AgentExecutor(
    agent=react_agent,
    tools=tools,
    verbose=True,
    max_iterations=5
)

# OpenAI Functions agent
functions_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant"),
    ("human", "\\\\{input\\\\}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

functions_agent = create_openai_functions_agent(llm, tools, functions_prompt)
functions_executor = AgentExecutor(
    agent=functions_agent,
    tools=tools,
    verbose=True
)

Custom Agent Implementation

from langchain.agents import BaseSingleActionAgent
from langchain_core.agents import AgentAction, AgentFinish
from typing import List, Union, Any, Dict

class CustomAgent(BaseSingleActionAgent):
    """Custom agent with specialized logic"""

    def __init__(self, llm, tools, prompt):
        self.llm = llm
        self.tools = tools
        self.prompt = prompt

    @property
    def input_keys(self):
        return ["input"]

    def plan(
        self,
        intermediate_steps: List[tuple],
        callbacks=None,
        **kwargs: Any
    ) -> Union[AgentAction, AgentFinish]:
        # Custom planning logic
        full_inputs = self.get_full_inputs(intermediate_steps, **kwargs)
        full_output = self.llm.predict(full_inputs)

        # Parse output to determine action
        if "Final Answer:" in full_output:
            return AgentFinish(
                return_values=\\\\{"output": full_output.split("Final Answer:")[-1].strip()\\\\},
                log=full_output
            )
        else:
            # Extract action and action input
            action = self.extract_action(full_output)
            return AgentAction(
                tool=action["tool"],
                tool_input=action["input"],
                log=full_output
            )

    def get_full_inputs(self, intermediate_steps, **kwargs):
        # Build the full input string
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += f"Action: \\\\{action.tool\\\\}\n"
            thoughts += f"Action Input: \\\\{action.tool_input\\\\}\n"
            thoughts += f"Observation: \\\\{observation\\\\}\n"

        return self.prompt.format(
            input=kwargs["input"],
            agent_scratchpad=thoughts
        )

    def extract_action(self, text):
        # Parse the LLM output to extract action
        # Implementation depends on your prompt format
        pass

# Use custom agent
custom_executor = AgentExecutor(
    agent=CustomAgent(llm, tools, custom_prompt),
    tools=tools,
    verbose=True
)

Multi-Agent Systems

from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_core.prompts import ChatPromptTemplate

# Research agent
research_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a research specialist. Focus on gathering comprehensive information."),
    ("human", "\\\\{input\\\\}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

research_agent = create_openai_functions_agent(llm, [search_tool, wikipedia_tool], research_prompt)
research_executor = AgentExecutor(agent=research_agent, tools=[search_tool, wikipedia_tool])

# Analysis agent
analysis_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an analysis specialist. Focus on interpreting and analyzing information."),
    ("human", "\\\\{input\\\\}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

analysis_agent = create_openai_functions_agent(llm, [DatabaseTool()], analysis_prompt)
analysis_executor = AgentExecutor(agent=analysis_agent, tools=[DatabaseTool()])

# Coordinator agent
def multi_agent_workflow(query):
    # Step 1: Research
    research_result = research_executor.invoke(\\\\{"input": f"Research: \\\\{query\\\\}"\\\\})

    # Step 2: Analysis
    analysis_input = f"Analyze this research: \\\\{research_result['output']\\\\}"
    analysis_result = analysis_executor.invoke(\\\\{"input": analysis_input\\\\})

    # Step 3: Synthesis
    synthesis_prompt = f"""
    Based on the research and analysis below, provide a comprehensive answer:

    Research: \\\\{research_result['output']\\\\}
    Analysis: \\\\{analysis_result['output']\\\\}

    Original Question: \\\\{query\\\\}
    """

    final_result = llm.invoke(synthesis_prompt)
    return final_result.content

# Execute multi-agent workflow
result = multi_agent_workflow("What are the latest trends in AI?")

Retrieval Augmented Generation (RAG)

Basic RAG Implementation

from langchain_community.document_loaders import TextLoader, PDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# Load and process documents
loader = TextLoader("documents.txt")
documents = loader.load()

# Split documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(documents)

# Create embeddings and vector store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever(search_kwargs=\\\\{"k": 3\\\\})

# RAG prompt
rag_prompt = ChatPromptTemplate.from_template("""
Answer the question based only on the following context:

\\\\{context\\\\}

Question: \\\\{question\\\\}

Answer:
""")

# RAG chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    \\\\{"context": retriever|format_docs, "question": RunnablePassthrough()\\\\}
|rag_prompt
|llm
|StrOutputParser()
)

# Query the RAG system
result = rag_chain.invoke("What is the main topic of the documents?")

Advanced RAG with Reranking

from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever

# Create compression retriever with reranking
embeddings_filter = EmbeddingsFilter(
    embeddings=embeddings,
    similarity_threshold=0.76
)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter,
    base_retriever=retriever
)

# Advanced RAG with multiple retrieval strategies
def multi_retrieval_rag(question):
    # Similarity search
    similarity_docs = vectorstore.similarity_search(question, k=3)

    # MMR (Maximum Marginal Relevance) search
    mmr_docs = vectorstore.max_marginal_relevance_search(question, k=3)

    # Combine and deduplicate
    all_docs = similarity_docs + mmr_docs
    unique_docs = list(\\\\{doc.page_content: doc for doc in all_docs\\\\}.values())

    # Format context
    context = format_docs(unique_docs)

    # Generate answer
    prompt = rag_prompt.format(context=context, question=question)
    response = llm.invoke(prompt)

    return response.content

result = multi_retrieval_rag("What are the key benefits mentioned?")

RAG with Metadata Filtering

from langchain_core.documents import Document

# Documents with metadata
documents_with_metadata = [
    Document(
        page_content="LangChain is a framework for LLM applications",
        metadata=\\\\{"source": "documentation", "category": "framework", "date": "2024"\\\\}
    ),
    Document(
        page_content="RAG combines retrieval with generation",
        metadata=\\\\{"source": "tutorial", "category": "technique", "date": "2024"\\\\}
    )
]

# Create vector store with metadata
vectorstore_with_metadata = FAISS.from_documents(
    documents_with_metadata,
    embeddings
)

# Filtered retrieval
def filtered_rag(question, filter_criteria):
    # Retrieve with metadata filtering
    docs = vectorstore_with_metadata.similarity_search(
        question,
        k=5,
        filter=filter_criteria
    )

    context = format_docs(docs)
    prompt = rag_prompt.format(context=context, question=question)
    response = llm.invoke(prompt)

    return response.content

# Query with filtering
result = filtered_rag(
    "What is LangChain?",
    \\\\{"category": "framework"\\\\}
)

Production Deployment and Monitoring

LangSmith Integration

import os
from langsmith import Client
from langchain.callbacks import LangChainTracer

# Set up LangSmith
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-langsmith-api-key"
os.environ["LANGCHAIN_PROJECT"] = "your-project-name"

# Initialize client
client = Client()

# Chain with tracing
traced_chain = (
    prompt
|llm.with_config(\\\\{"tags": ["production", "v1.0"]\\\\})
|StrOutputParser()
)

# Execute with automatic tracing
result = traced_chain.invoke(
    \\\\{"input": "test query"\\\\},
    config=\\\\{"metadata": \\\\{"user_id": "user123", "session_id": "session456"\\\\}\\\\}
)

Custom Callbacks and Monitoring

from langchain.callbacks.base import BaseCallbackHandler
from typing import Dict, Any, List
import logging
import time

class CustomCallbackHandler(BaseCallbackHandler):
    """Custom callback for monitoring and logging"""

    def __init__(self):
        self.start_time = None
        self.logger = logging.getLogger(__name__)

    def on_chain_start(
        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
    ) -> Any:
        self.start_time = time.time()
        self.logger.info(f"Chain started with inputs: \\\\{inputs\\\\}")

    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:
        duration = time.time() - self.start_time
        self.logger.info(f"Chain completed in \\\\{duration:.2f\\\\}s")
        self.logger.info(f"Outputs: \\\\{outputs\\\\}")

    def on_chain_error(self, error: Exception, **kwargs: Any) -> Any:
        self.logger.error(f"Chain error: \\\\{error\\\\}")

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> Any:
        self.logger.info(f"LLM called with prompts: \\\\{prompts\\\\}")

    def on_llm_end(self, response, **kwargs: Any) -> Any:
        self.logger.info(f"LLM response: \\\\{response\\\\}")

# Use custom callback
custom_callback = CustomCallbackHandler()
monitored_chain = chain.with_config(\\\\{"callbacks": [custom_callback]\\\\})

Error Handling and Resilience

from langchain_core.runnables import RunnableRetry
from langchain.schema import OutputParserException
import time

# Retry configuration
retry_chain = RunnableRetry(
    bound=chain,
    max_attempt_number=3,
    wait_exponential_jitter=True
)

# Custom error handling
def handle_chain_errors(func):
    def wrapper(*args, **kwargs):
        max_retries = 3
        for attempt in range(max_retries):
            try:
                return func(*args, **kwargs)
            except OutputParserException as e:
                print(f"Parsing error on attempt \\\\{attempt + 1\\\\}: \\\\{e\\\\}")
                if attempt == max_retries - 1:
                    raise
                time.sleep(2 ** attempt)
            except Exception as e:
                print(f"Unexpected error: \\\\{e\\\\}")
                if attempt == max_retries - 1:
                    raise
                time.sleep(2 ** attempt)
    return wrapper

@handle_chain_errors
def robust_chain_execution(input_data):
    return chain.invoke(input_data)

Caching and Performance Optimization

from langchain.cache import InMemoryCache, SQLiteCache
from langchain.globals import set_llm_cache
import sqlite3

# In-memory caching
set_llm_cache(InMemoryCache())

# SQLite caching for persistence
set_llm_cache(SQLiteCache(database_path=".langchain.db"))

# Custom caching implementation
class RedisCache:
    def __init__(self, redis_client):
        self.redis_client = redis_client

    def lookup(self, prompt, llm_string):
        key = f"\\\\{llm_string\\\\}:\\\\{hash(prompt)\\\\}"
        return self.redis_client.get(key)

    def update(self, prompt, llm_string, return_val):
        key = f"\\\\{llm_string\\\\}:\\\\{hash(prompt)\\\\}"
        self.redis_client.set(key, return_val, ex=3600)  # 1 hour expiry

# Performance monitoring
class PerformanceMonitor:
    def __init__(self):
        self.metrics = \\\\{\\\\}

    def track_execution(self, chain_name, execution_time, token_count):
        if chain_name not in self.metrics:
            self.metrics[chain_name] = []

        self.metrics[chain_name].append(\\\\{
            "execution_time": execution_time,
            "token_count": token_count,
            "timestamp": time.time()
        \\\\})

    def get_average_metrics(self, chain_name):
        if chain_name not in self.metrics:
            return None

        data = self.metrics[chain_name]
        avg_time = sum(d["execution_time"] for d in data) / len(data)
        avg_tokens = sum(d["token_count"] for d in data) / len(data)

        return \\\\{"avg_time": avg_time, "avg_tokens": avg_tokens\\\\}

monitor = PerformanceMonitor()

Best Practices and Patterns

Chain Design Principles

Modularity: Build reusable chain components
Error Handling: Implement robust error handling and retries
Monitoring: Add comprehensive logging and monitoring
Testing: Create unit tests for chain components
Documentation: Document chain behavior and expected inputs/outputs

Performance Optimization

Caching: Implement appropriate caching strategies
Batching: Use batch processing for multiple requests
Streaming: Implement streaming for real-time responses
Async: Use async operations for better concurrency
Resource Management: Monitor and manage API rate limits

Security Considerations

Input Validation: Validate and sanitize all inputs
API Key Management: Secure API key storage and rotation
Data Privacy: Implement data privacy and retention policies
Access Control: Implement proper authentication and authorization
Audit Logging: Maintain comprehensive audit logs

Testing Strategies

import pytest
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

class TestLangChainComponents:
    def setup_method(self):
        self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
        self.prompt = ChatPromptTemplate.from_template("Test prompt: \\\\{input\\\\}")
        self.chain = self.prompt|self.llm

    def test_chain_basic_functionality(self):
        result = self.chain.invoke(\\\\{"input": "hello"\\\\})
        assert result is not None
        assert len(result.content) > 0

    def test_chain_with_invalid_input(self):
        with pytest.raises(KeyError):
            self.chain.invoke(\\\\{"wrong_key": "value"\\\\})

    def test_chain_performance(self):
        import time
        start_time = time.time()
        result = self.chain.invoke(\\\\{"input": "performance test"\\\\})
        execution_time = time.time() - start_time
        assert execution_time ``< 10  # Should complete within 10 seconds

    @pytest.mark.asyncio
    async def test_async_chain(self):
        result = await self.chain.ainvoke(\\\{"input": "async test"\\\})
        assert result is not None

Troubleshooting Common Issues

API Rate Limits

from langchain.llms.base import LLM
import time
import random

class RateLimitedLLM(LLM):
    def __init__(self, base_llm, max_requests_per_minute=60):
        self.base_llm = base_llm
        self.max_requests_per_minute = max_requests_per_minute
        self.request_times = []

    def _call(self, prompt, stop=None, run_manager=None):
        self._wait_if_needed()
        return self.base_llm._call(prompt, stop, run_manager)

    def _wait_if_needed(self):
        now = time.time()
        # Remove requests older than 1 minute
        self.request_times = [t for t in self.request_times if now - t < 60]

        if len(self.request_times) >``= self.max_requests_per_minute:
            sleep_time = 60 - (now - self.request_times[0]) + random.uniform(1, 3)
            time.sleep(sleep_time)

        self.request_times.append(now)

    @property
    def _llm_type(self):
        return "rate_limited"

Memory Issues

# Memory cleanup utilities
def cleanup_memory(memory_object):
    """Clean up memory to prevent memory leaks"""
    if hasattr(memory_object, 'clear'):
        memory_object.clear()

    # Force garbage collection
    import gc
    gc.collect()

# Memory usage monitoring
import psutil
import os

def monitor_memory_usage():
    process = psutil.Process(os.getpid())
    memory_info = process.memory_info()
    return \\\\{
        "rss": memory_info.rss / 1024 / 1024,  # MB
        "vms": memory_info.vms / 1024 / 1024   # MB
    \\\\}

Debugging Chain Execution

from langchain.globals import set_debug, set_verbose

# Enable debugging
set_debug(True)
set_verbose(True)

# Custom debug callback
class DebugCallback(BaseCallbackHandler):
    def on_chain_start(self, serialized, inputs, **kwargs):
        print(f"🔗 Chain started: \\\\{serialized.get('name', 'Unknown')\\\\}")
        print(f"📥 Inputs: \\\\{inputs\\\\}")

    def on_llm_start(self, serialized, prompts, **kwargs):
        print(f"🤖 LLM called with prompts:")
        for i, prompt in enumerate(prompts):
            print(f"  Prompt \\\\{i\\\\}: \\\\{prompt[:100]\\\\}...")

    def on_tool_start(self, serialized, input_str, **kwargs):
        print(f"🔧 Tool called: \\\\{serialized.get('name', 'Unknown')\\\\}")
        print(f"📥 Input: \\\\{input_str\\\\}")

debug_chain = chain.with_config(\\\\{"callbacks": [DebugCallback()]\\\\})

This comprehensive LangChain cheat sheet covers everything from basic setup to advanced production patterns. Use these examples and best practices to build robust, scalable LLM applications with LangChain’s powerful framework.