Salta ai contenuti

Milvus Cheat Sheet

Overview

Milvus is an open-source vector database designed for AI similarity search at scale. It supports billion-scale vector data with millisecond-level search latency through advanced indexing algorithms (HNSW, IVF, DiskANN, GPU indexes). Milvus provides a distributed architecture with separation of storage and compute, supporting both standalone and cluster deployments with horizontal scalability.

The database natively supports multiple vector types (dense, sparse, binary), scalar filtering alongside vector search, hybrid search combining multiple retrieval strategies, and multi-tenancy. It integrates with popular AI frameworks like LangChain, LlamaIndex, and Haystack, making it a foundational component for RAG systems, recommendation engines, and image/video search applications.

Installation

Docker (Standalone)

# Download and run
curl -sfL https://raw.githubusercontent.com/milvus-io/milvus/master/scripts/standalone_embed.sh -o standalone_embed.sh
bash standalone_embed.sh start

# Or with Docker Compose
wget https://github.com/milvus-io/milvus/releases/download/v2.4.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
docker compose up -d

# Milvus listens on port 19530 (gRPC) and 9091 (HTTP)

Docker Compose (Full)

version: '3.8'
services:
  etcd:
    image: quay.io/coreos/etcd:v3.5.5
    environment:
      - ETCD_AUTO_COMPACTION_MODE=revision
      - ETCD_AUTO_COMPACTION_RETENTION=1000
    volumes:
      - etcd_data:/etcd

  minio:
    image: minio/minio:latest
    environment:
      MINIO_ACCESS_KEY: minioadmin
      MINIO_SECRET_KEY: minioadmin
    command: minio server /minio_data
    volumes:
      - minio_data:/minio_data

  milvus:
    image: milvusdb/milvus:v2.4-latest
    command: ["milvus", "run", "standalone"]
    environment:
      ETCD_ENDPOINTS: etcd:2379
      MINIO_ADDRESS: minio:9000
    ports:
      - "19530:19530"
      - "9091:9091"
    depends_on:
      - etcd
      - minio
    volumes:
      - milvus_data:/var/lib/milvus

volumes:
  etcd_data:
  minio_data:
  milvus_data:

Python SDK

pip install pymilvus

# With model support (embeddings)
pip install "pymilvus[model]"

Core Operations

Connect and Create Collection

from pymilvus import MilvusClient

# Connect to Milvus
client = MilvusClient(uri="http://localhost:19530")

# Create collection with auto schema
client.create_collection(
    collection_name="documents",
    dimension=1536,  # OpenAI embedding dimension
    metric_type="COSINE"
)

# Create collection with custom schema
from pymilvus import CollectionSchema, FieldSchema, DataType

fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1536),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
    FieldSchema(name="source", dtype=DataType.VARCHAR, max_length=256),
    FieldSchema(name="timestamp", dtype=DataType.INT64),
]
schema = CollectionSchema(fields=fields, description="Document embeddings")

client.create_collection(
    collection_name="documents_custom",
    schema=schema
)

Insert Data

import numpy as np

# Insert with auto-generated IDs
data = [
    {"embedding": np.random.rand(1536).tolist(), "text": "RAG overview", "source": "wiki", "timestamp": 1700000000},
    {"embedding": np.random.rand(1536).tolist(), "text": "Vector search", "source": "docs", "timestamp": 1700000100},
]
client.insert(collection_name="documents", data=data)

# Bulk insert
vectors = np.random.rand(10000, 1536).tolist()
texts = [f"Document {i}" for i in range(10000)]
data = [{"embedding": v, "text": t, "source": "batch", "timestamp": 0} for v, t in zip(vectors, texts)]
client.insert(collection_name="documents", data=data)
# Basic vector search
query_vector = np.random.rand(1536).tolist()
results = client.search(
    collection_name="documents",
    data=[query_vector],
    limit=10,
    output_fields=["text", "source"]
)
for hits in results:
    for hit in hits:
        print(f"ID: {hit['id']}, Distance: {hit['distance']:.4f}, Text: {hit['entity']['text']}")

# Search with scalar filtering
results = client.search(
    collection_name="documents",
    data=[query_vector],
    limit=5,
    filter='source == "wiki" and timestamp > 1700000000',
    output_fields=["text", "source", "timestamp"]
)

# Multi-vector search (hybrid)
from pymilvus import AnnSearchRequest, RRFRanker

req1 = AnnSearchRequest(data=[dense_vector], anns_field="dense_embedding", param={"metric_type": "COSINE"}, limit=20)
req2 = AnnSearchRequest(data=[sparse_vector], anns_field="sparse_embedding", param={"metric_type": "IP"}, limit=20)

results = client.hybrid_search(
    collection_name="hybrid_docs",
    reqs=[req1, req2],
    ranker=RRFRanker(k=60),
    limit=10,
    output_fields=["text"]
)

Index Types

IndexTypeBest ForMemory
FLATBrute forceSmall datasets (<1M)High
IVF_FLATInverted fileMedium datasetsMedium
IVF_SQ8Scalar quantizationBalance speed/accuracyLow
IVF_PQProduct quantizationLarge datasetsVery low
HNSWGraph-basedLow latency searchHigh
DISKANNDisk-basedBillion-scaleVery low
GPU_IVF_FLATGPU acceleratedHigh throughputGPU
GPU_CAGRAGPU graphFastest GPU searchGPU
# Create index
client.create_index(
    collection_name="documents",
    field_name="embedding",
    index_params={
        "index_type": "HNSW",
        "metric_type": "COSINE",
        "params": {"M": 16, "efConstruction": 256}
    }
)

Configuration

Search Parameters

# HNSW search params
search_params = {"metric_type": "COSINE", "params": {"ef": 128}}

# IVF search params
search_params = {"metric_type": "L2", "params": {"nprobe": 32}}

# DiskANN search params
search_params = {"metric_type": "COSINE", "params": {"search_list": 100}}

results = client.search(
    collection_name="documents",
    data=[query_vector],
    limit=10,
    search_params=search_params
)

Collection Management

# List collections
collections = client.list_collections()

# Get collection info
info = client.describe_collection("documents")
print(f"Rows: {info['row_count']}")

# Drop collection
client.drop_collection("documents")

# Create partition
client.create_partition(collection_name="documents", partition_name="2024")

# Insert into partition
client.insert(collection_name="documents", data=data, partition_name="2024")

Advanced Usage

Sparse Vectors (BM25/SPLADE)

from pymilvus import FieldSchema, DataType

fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=1536),
    FieldSchema(name="sparse", dtype=DataType.SPARSE_FLOAT_VECTOR),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
]

# Insert sparse vectors
sparse_data = [{0: 0.5, 102: 0.3, 2048: 0.8}]  # {dimension_index: value}

Built-in Embedding Functions

from pymilvus.model.hybrid import BGEM3EmbeddingFunction

ef = BGEM3EmbeddingFunction(model_name="BAAI/bge-m3", device="cpu")
docs = ["What is RAG?", "Vector databases store embeddings."]
embeddings = ef.encode_documents(docs)

# Returns both dense and sparse vectors
print(f"Dense shape: {embeddings['dense'][0].shape}")
print(f"Sparse keys: {len(embeddings['sparse'][0])}")

Role-Based Access Control

from pymilvus import MilvusClient

client = MilvusClient(uri="http://localhost:19530", token="root:Milvus")

# Create role
client.create_role("reader")
client.grant_privilege("reader", "Collection", "documents", "Search")
client.grant_privilege("reader", "Collection", "documents", "Query")

# Create user
client.create_user("analyst", "password123")
client.grant_role("analyst", "reader")

Troubleshooting

IssueSolution
Connection refused on 19530Check Milvus is running: docker ps, verify port mapping
Insert performance slowIncrease batch size to 10000+, use bulk insert API
Search latency highBuild appropriate index, increase ef/nprobe params
Out of memoryUse IVF_PQ or DiskANN index, enable mmap
Dimension mismatch errorEnsure query vector dim matches collection field dim
etcd connection issuesCheck etcd health: etcdctl endpoint health
MinIO storage fullExpand MinIO volume, run compaction
Query returns emptyCheck collection is loaded: client.load_collection()
# Check Milvus health
curl http://localhost:9091/healthz

# View metrics
curl http://localhost:9091/metrics

# Check logs
docker logs milvus-standalone

# Compact collection (reclaim space)
# Via Python: client.compact("documents")