BabyAGI Cheat Sheet
Overview
BabyAGI is an AI-powered task management system that uses large language models to autonomously create, prioritize, and execute tasks toward a given objective. Originally created by Yohei Nakajima, it demonstrates the concept of autonomous AI agents that can break down complex goals into subtasks, execute them, and generate new tasks based on results. BabyAGI serves as both a practical tool and a foundational reference for building agentic AI systems.
The system operates in a continuous loop: it executes the highest-priority task, generates new tasks based on results, and re-prioritizes the task queue. It uses vector databases (Chroma, Pinecone, Weaviate) for context storage and retrieval, enabling the agent to maintain memory across task executions.
Installation
git clone https://github.com/yoheinakajima/babyagi.git
cd babyagi
pip install -r requirements.txt
# Copy and configure environment
cp .env.example .env
# Edit .env with your API keys
Environment Setup
# .env
OPENAI_API_KEY=sk-...
OPENAI_API_MODEL=gpt-4o
# Vector store (choose one)
TABLE_NAME=baby-agi-test-table
RESULTS_STORE_NAME=baby-agi-test-results
# Pinecone (optional)
PINECONE_API_KEY=your-key
PINECONE_ENVIRONMENT=us-east-1
# Objective and initial task
OBJECTIVE="Research and summarize the latest trends in AI safety"
INITIAL_TASK="Develop a task list for researching AI safety trends"
# Agent settings
BABY_NAME=BabyAGI
COOPERATIVE_MODE=none # none, local, distributed
Core Usage
Run BabyAGI
# Basic run
python babyagi.py
# With custom objective
OBJECTIVE="Create a marketing plan for a SaaS product" python babyagi.py
# With specific model
OPENAI_API_MODEL=gpt-4o python babyagi.py
How the Loop Works
1. TASK EXECUTION
- Pull highest priority task from queue
- Execute task using LLM with context from vector store
- Store result in vector database
2. TASK CREATION
- Based on execution result and objective
- LLM generates new tasks that haven't been created
- New tasks added to queue
3. TASK PRIORITIZATION
- LLM re-ranks all tasks by importance
- Tasks reordered in queue
- Loop returns to step 1
Python API
import os
from collections import deque
from openai import OpenAI
client = OpenAI()
OBJECTIVE = "Research and summarize quantum computing advances"
# Task queue
task_list = deque()
task_list.append({"task_id": 1, "task_name": "List key quantum computing breakthroughs in 2024-2025"})
def execution_agent(objective, task):
"""Execute a task using LLM."""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an AI task execution agent."},
{"role": "user", "content": f"Objective: {objective}\nTask: {task}\nExecute this task and provide a detailed result."}
],
max_tokens=2000
)
return response.choices[0].message.content
def task_creation_agent(objective, result, task_description, task_list):
"""Create new tasks based on results."""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a task creation AI."},
{"role": "user", "content": f"""
Objective: {objective}
Last completed task: {task_description}
Result: {result}
Existing tasks: {[t['task_name'] for t in task_list]}
Create new tasks needed to reach the objective. Return as numbered list.
"""}
]
)
new_tasks = response.choices[0].message.content.strip().split("\n")
return [{"task_name": t.strip("0123456789. ")} for t in new_tasks if t.strip()]
def prioritization_agent(objective, task_list):
"""Prioritize tasks."""
task_names = [t["task_name"] for t in task_list]
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a task prioritization AI."},
{"role": "user", "content": f"""
Objective: {objective}
Tasks: {task_names}
Reprioritize these tasks from most to least important. Return as numbered list.
"""}
]
)
prioritized = response.choices[0].message.content.strip().split("\n")
return [{"task_id": i+1, "task_name": t.strip("0123456789. ")} for i, t in enumerate(prioritized) if t.strip()]
# Main loop (limited iterations)
for iteration in range(5):
if not task_list:
break
task = task_list.popleft()
print(f"\n--- Executing Task {task['task_id']}: {task['task_name']} ---")
result = execution_agent(OBJECTIVE, task["task_name"])
print(f"Result: {result[:200]}...")
new_tasks = task_creation_agent(OBJECTIVE, result, task["task_name"], list(task_list))
for t in new_tasks:
task_list.append(t)
if task_list:
prioritized = prioritization_agent(OBJECTIVE, list(task_list))
task_list = deque(prioritized)
print(f"\nRemaining tasks: {len(task_list)}")
Configuration
Vector Store Options
# Chroma (default, local)
# No additional config needed - uses in-memory or local persistence
# Pinecone
PINECONE_API_KEY=your-key
PINECONE_ENVIRONMENT=us-east-1
# Weaviate
WEAVIATE_URL=http://localhost:8080
WEAVIATE_API_KEY=your-key
Model Configuration
# OpenAI models
OPENAI_API_MODEL=gpt-4o # Best quality
OPENAI_API_MODEL=gpt-4o-mini # Faster, cheaper
# Temperature
OPENAI_TEMPERATURE=0.5 # 0=deterministic, 1=creative
Cooperative Mode
# Single agent (default)
COOPERATIVE_MODE=none
# Multiple agents sharing task queue
COOPERATIVE_MODE=local
# Distributed agents across machines
COOPERATIVE_MODE=distributed
Advanced Usage
BabyAGI with Tools
import requests
from openai import OpenAI
client = OpenAI()
tools = {
"web_search": lambda query: requests.get(f"https://api.search.com?q={query}").json(),
"save_file": lambda name, content: open(name, "w").write(content),
"read_file": lambda name: open(name).read(),
}
def execute_with_tools(objective, task):
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": f"""You are an AI agent with tools: {list(tools.keys())}
Use tool calls when needed. Format: TOOL[tool_name](args)"""},
{"role": "user", "content": f"Objective: {objective}\nTask: {task}"}
]
)
result = response.choices[0].message.content
# Parse and execute tool calls
import re
tool_calls = re.findall(r'TOOL\[(\w+)\]\((.*?)\)', result)
for tool_name, args in tool_calls:
if tool_name in tools:
tool_result = tools[tool_name](*eval(f"({args},)"))
result += f"\n[Tool {tool_name} result: {str(tool_result)[:200]}]"
return result
With Memory (Vector Store)
import chromadb
chroma_client = chromadb.Client()
collection = chroma_client.create_collection("babyagi_memory")
def store_result(task_id, task_name, result):
collection.add(
documents=[result],
metadatas=[{"task_name": task_name}],
ids=[f"task_{task_id}"]
)
def get_relevant_context(query, n_results=5):
results = collection.query(query_texts=[query], n_results=n_results)
return "\n".join(results["documents"][0]) if results["documents"][0] else ""
def execution_agent_with_memory(objective, task):
context = get_relevant_context(task)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an AI agent."},
{"role": "user", "content": f"""
Objective: {objective}
Task: {task}
Relevant context from previous tasks: {context}
Execute this task."""}
]
)
return response.choices[0].message.content
Rate Limiting and Safety
import time
MAX_ITERATIONS = 20
DELAY_BETWEEN_TASKS = 2 # seconds
for iteration in range(MAX_ITERATIONS):
if not task_list:
print("All tasks completed!")
break
task = task_list.popleft()
result = execution_agent(OBJECTIVE, task["task_name"])
# Safety check - stop if going in circles
if iteration > 10 and len(task_list) > 50:
print("Task list growing too large, stopping.")
break
time.sleep(DELAY_BETWEEN_TASKS)
Troubleshooting
| Issue | Solution |
|---|---|
| API rate limits | Add delays between iterations, use gpt-4o-mini |
| Tasks keep growing | Set MAX_ITERATIONS limit, use stricter creation prompts |
| Repetitive tasks | Improve prioritization prompt, add deduplication |
| Context lost between runs | Use persistent vector store (Chroma file, Pinecone) |
| High API costs | Use cheaper models for creation/prioritization |
| Agent stuck in loop | Add iteration counter and break conditions |
| Vague task results | Improve execution prompt with specific output format |
| Memory overflow | Limit vector store size, prune old entries |
# Check environment
python -c "import openai; print('OpenAI SDK ready')"
python -c "import chromadb; print('Chroma ready')"
# Dry run (single iteration)
MAX_ITERATIONS=1 python babyagi.py