Pinecone Cheat Sheet
Overview
Pinecone is a fully managed vector database designed for AI applications requiring fast and scalable similarity search. It handles infrastructure management, scaling, and optimization automatically, allowing developers to focus on building AI features. Pinecone supports serverless indexes (pay-per-query) and pod-based indexes (dedicated compute), with features like metadata filtering, namespace isolation, sparse-dense hybrid search, and real-time upserts.
The platform is commonly used for RAG systems, recommendation engines, semantic search, and anomaly detection. Pinecone integrates natively with LangChain, LlamaIndex, Haystack, and other AI frameworks. It offers a generous free tier for prototyping and scales to billions of vectors in production.
Installation
pip install pinecone
# Older SDK (v2)
pip install pinecone-client
# JavaScript
npm install @pinecone-database/pinecone
Core Operations
Initialize and Create Index
from pinecone import Pinecone, ServerlessSpec, PodSpec
pc = Pinecone(api_key="YOUR_API_KEY")
# Create serverless index
pc.create_index(
name="documents",
dimension=1536,
metric="cosine",
spec=ServerlessSpec(
cloud="aws",
region="us-east-1"
)
)
# Create pod-based index
pc.create_index(
name="documents-pod",
dimension=1536,
metric="cosine",
spec=PodSpec(
environment="us-east-1-aws",
pod_type="p1.x1",
pods=1,
replicas=1
)
)
# Wait for index to be ready
import time
while not pc.describe_index("documents").status["ready"]:
time.sleep(1)
Connect to Index
index = pc.Index("documents")
# Check index stats
stats = index.describe_index_stats()
print(f"Total vectors: {stats.total_vector_count}")
print(f"Namespaces: {stats.namespaces}")
Upsert Vectors
# Single upsert
index.upsert(
vectors=[
{"id": "doc-1", "values": [0.1, 0.2, ...], "metadata": {"text": "RAG overview", "source": "wiki"}},
{"id": "doc-2", "values": [0.3, 0.4, ...], "metadata": {"text": "Vector search", "source": "docs"}},
],
namespace="main"
)
# Batch upsert (recommended for large datasets)
from itertools import islice
def chunked(iterable, batch_size=100):
it = iter(iterable)
while batch := list(islice(it, batch_size)):
yield batch
vectors = [
{"id": f"doc-{i}", "values": embedding_list[i], "metadata": {"text": texts[i]}}
for i in range(len(texts))
]
for batch in chunked(vectors, batch_size=100):
index.upsert(vectors=batch, namespace="main")
Query
# Basic query
results = index.query(
vector=[0.1, 0.2, ...],
top_k=10,
include_metadata=True,
namespace="main"
)
for match in results.matches:
print(f"ID: {match.id}, Score: {match.score:.4f}, Text: {match.metadata.get('text')}")
# Query with metadata filter
results = index.query(
vector=query_embedding,
top_k=5,
filter={
"source": {"$eq": "wiki"},
"timestamp": {"$gt": 1700000000}
},
include_metadata=True
)
# Query by ID (fetch similar to existing vector)
results = index.query(
id="doc-1",
top_k=10,
include_metadata=True
)
Metadata Filtering
| Operator | Description | Example |
|---|---|---|
$eq | Equal to | {"source": {"$eq": "wiki"}} |
$ne | Not equal | {"source": {"$ne": "draft"}} |
$gt | Greater than | {"score": {"$gt": 0.8}} |
$gte | Greater or equal | {"year": {"$gte": 2024}} |
$lt | Less than | {"price": {"$lt": 100}} |
$lte | Less or equal | {"count": {"$lte": 50}} |
$in | In array | {"tag": {"$in": ["ai", "ml"]}} |
$nin | Not in array | {"status": {"$nin": ["deleted"]}} |
$and | Logical AND | {"$and": [{"a": 1}, {"b": 2}]} |
$or | Logical OR | {"$or": [{"a": 1}, {"b": 2}]} |
Fetch and Delete
# Fetch vectors by ID
result = index.fetch(ids=["doc-1", "doc-2"], namespace="main")
# Delete by ID
index.delete(ids=["doc-1", "doc-2"], namespace="main")
# Delete by filter
index.delete(
filter={"source": {"$eq": "deprecated"}},
namespace="main"
)
# Delete entire namespace
index.delete(delete_all=True, namespace="old_data")
Sparse-Dense Hybrid Search
# Upsert with sparse values
index.upsert(
vectors=[{
"id": "doc-1",
"values": dense_embedding, # Dense vector
"sparse_values": {
"indices": [102, 2048, 5000], # Non-zero dimensions
"values": [0.5, 0.3, 0.8] # Corresponding values
},
"metadata": {"text": "hybrid search example"}
}],
namespace="hybrid"
)
# Hybrid query
results = index.query(
vector=dense_query,
sparse_vector={
"indices": [102, 5000],
"values": [0.6, 0.4]
},
top_k=10,
include_metadata=True
)
Configuration
Index Management
# List indexes
indexes = pc.list_indexes()
for idx in indexes:
print(f"{idx.name}: {idx.dimension}d, {idx.metric}, {idx.status}")
# Describe index
desc = pc.describe_index("documents")
print(f"Host: {desc.host}")
print(f"Status: {desc.status}")
print(f"Spec: {desc.spec}")
# Configure index (pod-based only)
pc.configure_index(
name="documents-pod",
replicas=2,
pod_type="p1.x2"
)
# Delete index
pc.delete_index("documents")
Collections (Backups)
# Create collection from index
pc.create_collection(name="docs-backup", source="documents")
# List collections
collections = pc.list_collections()
# Create index from collection
pc.create_index(
name="documents-restored",
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
source_collection="docs-backup"
)
# Delete collection
pc.delete_collection("docs-backup")
Advanced Usage
Pinecone Assistant (RAG)
# Create an assistant
assistant = pc.assistant.create_assistant(
assistant_name="doc-helper",
instructions="Answer questions based on uploaded documents."
)
# Upload files
assistant.upload_file(file_path="manual.pdf")
assistant.upload_file(file_path="guide.txt")
# Chat with assistant
response = assistant.chat(messages=[
{"role": "user", "content": "How do I configure authentication?"}
])
print(response.message.content)
Integration with LangChain
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = PineconeVectorStore(
index_name="documents",
embedding=embeddings,
namespace="langchain"
)
# Add documents
from langchain.schema import Document
docs = [Document(page_content="RAG is...", metadata={"source": "wiki"})]
vectorstore.add_documents(docs)
# Search
results = vectorstore.similarity_search("What is RAG?", k=5)
REST API
# Query via REST
curl -X POST "https://documents-abc123.svc.us-east-1-aws.pinecone.io/query" \
-H "Api-Key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"vector": [0.1, 0.2, 0.3],
"topK": 10,
"includeMetadata": true,
"namespace": "main"
}'
# Upsert via REST
curl -X POST "https://documents-abc123.svc.us-east-1-aws.pinecone.io/vectors/upsert" \
-H "Api-Key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"vectors": [
{"id": "doc-1", "values": [0.1, 0.2], "metadata": {"text": "hello"}}
],
"namespace": "main"
}'
Troubleshooting
| Issue | Solution |
|---|---|
| Index not ready | Wait for creation (can take minutes), check describe_index |
| Upsert timeout | Reduce batch size to 100, use async upserts |
| Query returns no results | Check namespace, verify vectors exist with describe_index_stats |
| Dimension mismatch | Ensure vector dim matches index dim exactly |
| Rate limit exceeded | Implement exponential backoff, upgrade plan |
| Metadata too large | Keep metadata under 40KB per vector |
| High latency | Use serverless in same region as your app |
| Filter not working | Check metadata key exists and value type matches |
# Check API key
curl -H "Api-Key: YOUR_KEY" https://api.pinecone.io/indexes
# View index stats
curl -X POST "https://INDEX_HOST/describe_index_stats" \
-H "Api-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{}'