Ir al contenido

Pinecone Cheat Sheet

Overview

Pinecone is a fully managed vector database designed for AI applications requiring fast and scalable similarity search. It handles infrastructure management, scaling, and optimization automatically, allowing developers to focus on building AI features. Pinecone supports serverless indexes (pay-per-query) and pod-based indexes (dedicated compute), with features like metadata filtering, namespace isolation, sparse-dense hybrid search, and real-time upserts.

The platform is commonly used for RAG systems, recommendation engines, semantic search, and anomaly detection. Pinecone integrates natively with LangChain, LlamaIndex, Haystack, and other AI frameworks. It offers a generous free tier for prototyping and scales to billions of vectors in production.

Installation

pip install pinecone

# Older SDK (v2)
pip install pinecone-client

# JavaScript
npm install @pinecone-database/pinecone

Core Operations

Initialize and Create Index

from pinecone import Pinecone, ServerlessSpec, PodSpec

pc = Pinecone(api_key="YOUR_API_KEY")

# Create serverless index
pc.create_index(
    name="documents",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

# Create pod-based index
pc.create_index(
    name="documents-pod",
    dimension=1536,
    metric="cosine",
    spec=PodSpec(
        environment="us-east-1-aws",
        pod_type="p1.x1",
        pods=1,
        replicas=1
    )
)

# Wait for index to be ready
import time
while not pc.describe_index("documents").status["ready"]:
    time.sleep(1)

Connect to Index

index = pc.Index("documents")

# Check index stats
stats = index.describe_index_stats()
print(f"Total vectors: {stats.total_vector_count}")
print(f"Namespaces: {stats.namespaces}")

Upsert Vectors

# Single upsert
index.upsert(
    vectors=[
        {"id": "doc-1", "values": [0.1, 0.2, ...], "metadata": {"text": "RAG overview", "source": "wiki"}},
        {"id": "doc-2", "values": [0.3, 0.4, ...], "metadata": {"text": "Vector search", "source": "docs"}},
    ],
    namespace="main"
)

# Batch upsert (recommended for large datasets)
from itertools import islice

def chunked(iterable, batch_size=100):
    it = iter(iterable)
    while batch := list(islice(it, batch_size)):
        yield batch

vectors = [
    {"id": f"doc-{i}", "values": embedding_list[i], "metadata": {"text": texts[i]}}
    for i in range(len(texts))
]

for batch in chunked(vectors, batch_size=100):
    index.upsert(vectors=batch, namespace="main")

Query

# Basic query
results = index.query(
    vector=[0.1, 0.2, ...],
    top_k=10,
    include_metadata=True,
    namespace="main"
)

for match in results.matches:
    print(f"ID: {match.id}, Score: {match.score:.4f}, Text: {match.metadata.get('text')}")

# Query with metadata filter
results = index.query(
    vector=query_embedding,
    top_k=5,
    filter={
        "source": {"$eq": "wiki"},
        "timestamp": {"$gt": 1700000000}
    },
    include_metadata=True
)

# Query by ID (fetch similar to existing vector)
results = index.query(
    id="doc-1",
    top_k=10,
    include_metadata=True
)

Metadata Filtering

OperatorDescriptionExample
$eqEqual to{"source": {"$eq": "wiki"}}
$neNot equal{"source": {"$ne": "draft"}}
$gtGreater than{"score": {"$gt": 0.8}}
$gteGreater or equal{"year": {"$gte": 2024}}
$ltLess than{"price": {"$lt": 100}}
$lteLess or equal{"count": {"$lte": 50}}
$inIn array{"tag": {"$in": ["ai", "ml"]}}
$ninNot in array{"status": {"$nin": ["deleted"]}}
$andLogical AND{"$and": [{"a": 1}, {"b": 2}]}
$orLogical OR{"$or": [{"a": 1}, {"b": 2}]}

Fetch and Delete

# Fetch vectors by ID
result = index.fetch(ids=["doc-1", "doc-2"], namespace="main")

# Delete by ID
index.delete(ids=["doc-1", "doc-2"], namespace="main")

# Delete by filter
index.delete(
    filter={"source": {"$eq": "deprecated"}},
    namespace="main"
)

# Delete entire namespace
index.delete(delete_all=True, namespace="old_data")
# Upsert with sparse values
index.upsert(
    vectors=[{
        "id": "doc-1",
        "values": dense_embedding,       # Dense vector
        "sparse_values": {
            "indices": [102, 2048, 5000],  # Non-zero dimensions
            "values": [0.5, 0.3, 0.8]     # Corresponding values
        },
        "metadata": {"text": "hybrid search example"}
    }],
    namespace="hybrid"
)

# Hybrid query
results = index.query(
    vector=dense_query,
    sparse_vector={
        "indices": [102, 5000],
        "values": [0.6, 0.4]
    },
    top_k=10,
    include_metadata=True
)

Configuration

Index Management

# List indexes
indexes = pc.list_indexes()
for idx in indexes:
    print(f"{idx.name}: {idx.dimension}d, {idx.metric}, {idx.status}")

# Describe index
desc = pc.describe_index("documents")
print(f"Host: {desc.host}")
print(f"Status: {desc.status}")
print(f"Spec: {desc.spec}")

# Configure index (pod-based only)
pc.configure_index(
    name="documents-pod",
    replicas=2,
    pod_type="p1.x2"
)

# Delete index
pc.delete_index("documents")

Collections (Backups)

# Create collection from index
pc.create_collection(name="docs-backup", source="documents")

# List collections
collections = pc.list_collections()

# Create index from collection
pc.create_index(
    name="documents-restored",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    source_collection="docs-backup"
)

# Delete collection
pc.delete_collection("docs-backup")

Advanced Usage

Pinecone Assistant (RAG)

# Create an assistant
assistant = pc.assistant.create_assistant(
    assistant_name="doc-helper",
    instructions="Answer questions based on uploaded documents."
)

# Upload files
assistant.upload_file(file_path="manual.pdf")
assistant.upload_file(file_path="guide.txt")

# Chat with assistant
response = assistant.chat(messages=[
    {"role": "user", "content": "How do I configure authentication?"}
])
print(response.message.content)

Integration with LangChain

from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectorstore = PineconeVectorStore(
    index_name="documents",
    embedding=embeddings,
    namespace="langchain"
)

# Add documents
from langchain.schema import Document
docs = [Document(page_content="RAG is...", metadata={"source": "wiki"})]
vectorstore.add_documents(docs)

# Search
results = vectorstore.similarity_search("What is RAG?", k=5)

REST API

# Query via REST
curl -X POST "https://documents-abc123.svc.us-east-1-aws.pinecone.io/query" \
  -H "Api-Key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "vector": [0.1, 0.2, 0.3],
    "topK": 10,
    "includeMetadata": true,
    "namespace": "main"
  }'

# Upsert via REST
curl -X POST "https://documents-abc123.svc.us-east-1-aws.pinecone.io/vectors/upsert" \
  -H "Api-Key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "vectors": [
      {"id": "doc-1", "values": [0.1, 0.2], "metadata": {"text": "hello"}}
    ],
    "namespace": "main"
  }'

Troubleshooting

IssueSolution
Index not readyWait for creation (can take minutes), check describe_index
Upsert timeoutReduce batch size to 100, use async upserts
Query returns no resultsCheck namespace, verify vectors exist with describe_index_stats
Dimension mismatchEnsure vector dim matches index dim exactly
Rate limit exceededImplement exponential backoff, upgrade plan
Metadata too largeKeep metadata under 40KB per vector
High latencyUse serverless in same region as your app
Filter not workingCheck metadata key exists and value type matches
# Check API key
curl -H "Api-Key: YOUR_KEY" https://api.pinecone.io/indexes

# View index stats
curl -X POST "https://INDEX_HOST/describe_index_stats" \
  -H "Api-Key: YOUR_KEY" \
  -H "Content-Type: application/json" \
  -d '{}'