Pinecone Cheat Sheet

Overview

Pinecone is a fully managed vector database designed for AI applications requiring fast and scalable similarity search. It handles infrastructure management, scaling, and optimization automatically, allowing developers to focus on building AI features. Pinecone supports serverless indexes (pay-per-query) and pod-based indexes (dedicated compute), with features like metadata filtering, namespace isolation, sparse-dense hybrid search, and real-time upserts.

The platform is commonly used for RAG systems, recommendation engines, semantic search, and anomaly detection. Pinecone integrates natively with LangChain, LlamaIndex, Haystack, and other AI frameworks. It offers a generous free tier for prototyping and scales to billions of vectors in production.

Installation

pip install pinecone

# Older SDK (v2)
pip install pinecone-client

# JavaScript
npm install @pinecone-database/pinecone

Core Operations

Initialize and Create Index

from pinecone import Pinecone, ServerlessSpec, PodSpec

pc = Pinecone(api_key="YOUR_API_KEY")

# Create serverless index
pc.create_index(
    name="documents",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

# Create pod-based index
pc.create_index(
    name="documents-pod",
    dimension=1536,
    metric="cosine",
    spec=PodSpec(
        environment="us-east-1-aws",
        pod_type="p1.x1",
        pods=1,
        replicas=1
    )
)

# Wait for index to be ready
import time
while not pc.describe_index("documents").status["ready"]:
    time.sleep(1)

Connect to Index

index = pc.Index("documents")

# Check index stats
stats = index.describe_index_stats()
print(f"Total vectors: {stats.total_vector_count}")
print(f"Namespaces: {stats.namespaces}")

Upsert Vectors

# Single upsert
index.upsert(
    vectors=[
        {"id": "doc-1", "values": [0.1, 0.2, ...], "metadata": {"text": "RAG overview", "source": "wiki"}},
        {"id": "doc-2", "values": [0.3, 0.4, ...], "metadata": {"text": "Vector search", "source": "docs"}},
    ],
    namespace="main"
)

# Batch upsert (recommended for large datasets)
from itertools import islice

def chunked(iterable, batch_size=100):
    it = iter(iterable)
    while batch := list(islice(it, batch_size)):
        yield batch

vectors = [
    {"id": f"doc-{i}", "values": embedding_list[i], "metadata": {"text": texts[i]}}
    for i in range(len(texts))
]

for batch in chunked(vectors, batch_size=100):
    index.upsert(vectors=batch, namespace="main")

Query

# Basic query
results = index.query(
    vector=[0.1, 0.2, ...],
    top_k=10,
    include_metadata=True,
    namespace="main"
)

for match in results.matches:
    print(f"ID: {match.id}, Score: {match.score:.4f}, Text: {match.metadata.get('text')}")

# Query with metadata filter
results = index.query(
    vector=query_embedding,
    top_k=5,
    filter={
        "source": {"$eq": "wiki"},
        "timestamp": {"$gt": 1700000000}
    },
    include_metadata=True
)

# Query by ID (fetch similar to existing vector)
results = index.query(
    id="doc-1",
    top_k=10,
    include_metadata=True
)

Metadata Filtering

Operator	Description	Example
`$eq`	Equal to	`{"source": {"$eq": "wiki"}}`
`$ne`	Not equal	`{"source": {"$ne": "draft"}}`
`$gt`	Greater than	`{"score": {"$gt": 0.8}}`
`$gte`	Greater or equal	`{"year": {"$gte": 2024}}`
`$lt`	Less than	`{"price": {"$lt": 100}}`
`$lte`	Less or equal	`{"count": {"$lte": 50}}`
`$in`	In array	`{"tag": {"$in": ["ai", "ml"]}}`
`$nin`	Not in array	`{"status": {"$nin": ["deleted"]}}`
`$and`	Logical AND	`{"$and": [{"a": 1}, {"b": 2}]}`
`$or`	Logical OR	`{"$or": [{"a": 1}, {"b": 2}]}`

Fetch and Delete

# Fetch vectors by ID
result = index.fetch(ids=["doc-1", "doc-2"], namespace="main")

# Delete by ID
index.delete(ids=["doc-1", "doc-2"], namespace="main")

# Delete by filter
index.delete(
    filter={"source": {"$eq": "deprecated"}},
    namespace="main"
)

# Delete entire namespace
index.delete(delete_all=True, namespace="old_data")

Sparse-Dense Hybrid Search

# Upsert with sparse values
index.upsert(
    vectors=[{
        "id": "doc-1",
        "values": dense_embedding,       # Dense vector
        "sparse_values": {
            "indices": [102, 2048, 5000],  # Non-zero dimensions
            "values": [0.5, 0.3, 0.8]     # Corresponding values
        },
        "metadata": {"text": "hybrid search example"}
    }],
    namespace="hybrid"
)

# Hybrid query
results = index.query(
    vector=dense_query,
    sparse_vector={
        "indices": [102, 5000],
        "values": [0.6, 0.4]
    },
    top_k=10,
    include_metadata=True
)

Configuration

Index Management

# List indexes
indexes = pc.list_indexes()
for idx in indexes:
    print(f"{idx.name}: {idx.dimension}d, {idx.metric}, {idx.status}")

# Describe index
desc = pc.describe_index("documents")
print(f"Host: {desc.host}")
print(f"Status: {desc.status}")
print(f"Spec: {desc.spec}")

# Configure index (pod-based only)
pc.configure_index(
    name="documents-pod",
    replicas=2,
    pod_type="p1.x2"
)

# Delete index
pc.delete_index("documents")

Collections (Backups)

# Create collection from index
pc.create_collection(name="docs-backup", source="documents")

# List collections
collections = pc.list_collections()

# Create index from collection
pc.create_index(
    name="documents-restored",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    source_collection="docs-backup"
)

# Delete collection
pc.delete_collection("docs-backup")

Advanced Usage

Pinecone Assistant (RAG)

# Create an assistant
assistant = pc.assistant.create_assistant(
    assistant_name="doc-helper",
    instructions="Answer questions based on uploaded documents."
)

# Upload files
assistant.upload_file(file_path="manual.pdf")
assistant.upload_file(file_path="guide.txt")

# Chat with assistant
response = assistant.chat(messages=[
    {"role": "user", "content": "How do I configure authentication?"}
])
print(response.message.content)

Integration with LangChain

from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = PineconeVectorStore(
    index_name="documents",
    embedding=embeddings,
    namespace="langchain"
)

# Add documents
from langchain.schema import Document
docs = [Document(page_content="RAG is...", metadata={"source": "wiki"})]
vectorstore.add_documents(docs)

# Search
results = vectorstore.similarity_search("What is RAG?", k=5)

REST API

# Query via REST
curl -X POST "https://documents-abc123.svc.us-east-1-aws.pinecone.io/query" \
  -H "Api-Key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "vector": [0.1, 0.2, 0.3],
    "topK": 10,
    "includeMetadata": true,
    "namespace": "main"
  }'

# Upsert via REST
curl -X POST "https://documents-abc123.svc.us-east-1-aws.pinecone.io/vectors/upsert" \
  -H "Api-Key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "vectors": [
      {"id": "doc-1", "values": [0.1, 0.2], "metadata": {"text": "hello"}}
    ],
    "namespace": "main"
  }'

Troubleshooting

Issue	Solution
Index not ready	Wait for creation (can take minutes), check `describe_index`
Upsert timeout	Reduce batch size to 100, use async upserts
Query returns no results	Check namespace, verify vectors exist with `describe_index_stats`
Dimension mismatch	Ensure vector dim matches index dim exactly
Rate limit exceeded	Implement exponential backoff, upgrade plan
Metadata too large	Keep metadata under 40KB per vector
High latency	Use serverless in same region as your app
Filter not working	Check metadata key exists and value type matches

# Check API key
curl -H "Api-Key: YOUR_KEY" https://api.pinecone.io/indexes

# View index stats
curl -X POST "https://INDEX_HOST/describe_index_stats" \
  -H "Api-Key: YOUR_KEY" \
  -H "Content-Type: application/json" \
  -d '{}'