BabyAGI Cheat Sheet

Overview

BabyAGI is an AI-powered task management system that uses large language models to autonomously create, prioritize, and execute tasks toward a given objective. Originally created by Yohei Nakajima, it demonstrates the concept of autonomous AI agents that can break down complex goals into subtasks, execute them, and generate new tasks based on results. BabyAGI serves as both a practical tool and a foundational reference for building agentic AI systems.

The system operates in a continuous loop: it executes the highest-priority task, generates new tasks based on results, and re-prioritizes the task queue. It uses vector databases (Chroma, Pinecone, Weaviate) for context storage and retrieval, enabling the agent to maintain memory across task executions.

Installation

git clone https://github.com/yoheinakajima/babyagi.git
cd babyagi
pip install -r requirements.txt

# Copy and configure environment
cp .env.example .env
# Edit .env with your API keys

Environment Setup

# .env
OPENAI_API_KEY=sk-...
OPENAI_API_MODEL=gpt-4o

# Vector store (choose one)
TABLE_NAME=baby-agi-test-table
RESULTS_STORE_NAME=baby-agi-test-results

# Pinecone (optional)
PINECONE_API_KEY=your-key
PINECONE_ENVIRONMENT=us-east-1

# Objective and initial task
OBJECTIVE="Research and summarize the latest trends in AI safety"
INITIAL_TASK="Develop a task list for researching AI safety trends"

# Agent settings
BABY_NAME=BabyAGI
COOPERATIVE_MODE=none  # none, local, distributed

Core Usage

Run BabyAGI

# Basic run
python babyagi.py

# With custom objective
OBJECTIVE="Create a marketing plan for a SaaS product" python babyagi.py

# With specific model
OPENAI_API_MODEL=gpt-4o python babyagi.py

How the Loop Works

1. TASK EXECUTION
   - Pull highest priority task from queue
   - Execute task using LLM with context from vector store
   - Store result in vector database

2. TASK CREATION
   - Based on execution result and objective
   - LLM generates new tasks that haven't been created
   - New tasks added to queue

3. TASK PRIORITIZATION
   - LLM re-ranks all tasks by importance
   - Tasks reordered in queue
   - Loop returns to step 1

Python API

import os
from collections import deque
from openai import OpenAI

client = OpenAI()

OBJECTIVE = "Research and summarize quantum computing advances"

# Task queue
task_list = deque()
task_list.append({"task_id": 1, "task_name": "List key quantum computing breakthroughs in 2024-2025"})

def execution_agent(objective, task):
    """Execute a task using LLM."""
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are an AI task execution agent."},
            {"role": "user", "content": f"Objective: {objective}\nTask: {task}\nExecute this task and provide a detailed result."}
        ],
        max_tokens=2000
    )
    return response.choices[0].message.content

def task_creation_agent(objective, result, task_description, task_list):
    """Create new tasks based on results."""
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a task creation AI."},
            {"role": "user", "content": f"""
Objective: {objective}
Last completed task: {task_description}
Result: {result}
Existing tasks: {[t['task_name'] for t in task_list]}

Create new tasks needed to reach the objective. Return as numbered list.
"""}
        ]
    )
    new_tasks = response.choices[0].message.content.strip().split("\n")
    return [{"task_name": t.strip("0123456789. ")} for t in new_tasks if t.strip()]

def prioritization_agent(objective, task_list):
    """Prioritize tasks."""
    task_names = [t["task_name"] for t in task_list]
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a task prioritization AI."},
            {"role": "user", "content": f"""
Objective: {objective}
Tasks: {task_names}

Reprioritize these tasks from most to least important. Return as numbered list.
"""}
        ]
    )
    prioritized = response.choices[0].message.content.strip().split("\n")
    return [{"task_id": i+1, "task_name": t.strip("0123456789. ")} for i, t in enumerate(prioritized) if t.strip()]

# Main loop (limited iterations)
for iteration in range(5):
    if not task_list:
        break

    task = task_list.popleft()
    print(f"\n--- Executing Task {task['task_id']}: {task['task_name']} ---")

    result = execution_agent(OBJECTIVE, task["task_name"])
    print(f"Result: {result[:200]}...")

    new_tasks = task_creation_agent(OBJECTIVE, result, task["task_name"], list(task_list))
    for t in new_tasks:
        task_list.append(t)

    if task_list:
        prioritized = prioritization_agent(OBJECTIVE, list(task_list))
        task_list = deque(prioritized)

    print(f"\nRemaining tasks: {len(task_list)}")

Configuration

Vector Store Options

# Chroma (default, local)
# No additional config needed - uses in-memory or local persistence

# Pinecone
PINECONE_API_KEY=your-key
PINECONE_ENVIRONMENT=us-east-1

# Weaviate
WEAVIATE_URL=http://localhost:8080
WEAVIATE_API_KEY=your-key

Model Configuration

# OpenAI models
OPENAI_API_MODEL=gpt-4o          # Best quality
OPENAI_API_MODEL=gpt-4o-mini     # Faster, cheaper

# Temperature
OPENAI_TEMPERATURE=0.5           # 0=deterministic, 1=creative

Cooperative Mode

# Single agent (default)
COOPERATIVE_MODE=none

# Multiple agents sharing task queue
COOPERATIVE_MODE=local

# Distributed agents across machines
COOPERATIVE_MODE=distributed

Advanced Usage

BabyAGI with Tools

import requests
from openai import OpenAI

client = OpenAI()

tools = {
    "web_search": lambda query: requests.get(f"https://api.search.com?q={query}").json(),
    "save_file": lambda name, content: open(name, "w").write(content),
    "read_file": lambda name: open(name).read(),
}

def execute_with_tools(objective, task):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": f"""You are an AI agent with tools: {list(tools.keys())}
            Use tool calls when needed. Format: TOOL[tool_name](args)"""},
            {"role": "user", "content": f"Objective: {objective}\nTask: {task}"}
        ]
    )
    result = response.choices[0].message.content

    # Parse and execute tool calls
    import re
    tool_calls = re.findall(r'TOOL\[(\w+)\]\((.*?)\)', result)
    for tool_name, args in tool_calls:
        if tool_name in tools:
            tool_result = tools[tool_name](*eval(f"({args},)"))
            result += f"\n[Tool {tool_name} result: {str(tool_result)[:200]}]"

    return result

With Memory (Vector Store)

import chromadb

chroma_client = chromadb.Client()
collection = chroma_client.create_collection("babyagi_memory")

def store_result(task_id, task_name, result):
    collection.add(
        documents=[result],
        metadatas=[{"task_name": task_name}],
        ids=[f"task_{task_id}"]
    )

def get_relevant_context(query, n_results=5):
    results = collection.query(query_texts=[query], n_results=n_results)
    return "\n".join(results["documents"][0]) if results["documents"][0] else ""

def execution_agent_with_memory(objective, task):
    context = get_relevant_context(task)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are an AI agent."},
            {"role": "user", "content": f"""
Objective: {objective}
Task: {task}
Relevant context from previous tasks: {context}

Execute this task."""}
        ]
    )
    return response.choices[0].message.content

Rate Limiting and Safety

import time

MAX_ITERATIONS = 20
DELAY_BETWEEN_TASKS = 2  # seconds

for iteration in range(MAX_ITERATIONS):
    if not task_list:
        print("All tasks completed!")
        break

    task = task_list.popleft()
    result = execution_agent(OBJECTIVE, task["task_name"])

    # Safety check - stop if going in circles
    if iteration > 10 and len(task_list) > 50:
        print("Task list growing too large, stopping.")
        break

    time.sleep(DELAY_BETWEEN_TASKS)

Troubleshooting

Issue	Solution
API rate limits	Add delays between iterations, use gpt-4o-mini
Tasks keep growing	Set MAX_ITERATIONS limit, use stricter creation prompts
Repetitive tasks	Improve prioritization prompt, add deduplication
Context lost between runs	Use persistent vector store (Chroma file, Pinecone)
High API costs	Use cheaper models for creation/prioritization
Agent stuck in loop	Add iteration counter and break conditions
Vague task results	Improve execution prompt with specific output format
Memory overflow	Limit vector store size, prune old entries

# Check environment
python -c "import openai; print('OpenAI SDK ready')"
python -c "import chromadb; print('Chroma ready')"

# Dry run (single iteration)
MAX_ITERATIONS=1 python babyagi.py