Milvus Cheat Sheet
Overview
Milvus is an open-source vector database designed for AI similarity search at scale. It supports billion-scale vector data with millisecond-level search latency through advanced indexing algorithms (HNSW, IVF, DiskANN, GPU indexes). Milvus provides a distributed architecture with separation of storage and compute, supporting both standalone and cluster deployments with horizontal scalability.
The database natively supports multiple vector types (dense, sparse, binary), scalar filtering alongside vector search, hybrid search combining multiple retrieval strategies, and multi-tenancy. It integrates with popular AI frameworks like LangChain, LlamaIndex, and Haystack, making it a foundational component for RAG systems, recommendation engines, and image/video search applications.
Installation
Docker (Standalone)
# Download and run
curl -sfL https://raw.githubusercontent.com/milvus-io/milvus/master/scripts/standalone_embed.sh -o standalone_embed.sh
bash standalone_embed.sh start
# Or with Docker Compose
wget https://github.com/milvus-io/milvus/releases/download/v2.4.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
docker compose up -d
# Milvus listens on port 19530 (gRPC) and 9091 (HTTP)
Docker Compose (Full)
version: '3.8'
services:
etcd:
image: quay.io/coreos/etcd:v3.5.5
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
volumes:
- etcd_data:/etcd
minio:
image: minio/minio:latest
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
command: minio server /minio_data
volumes:
- minio_data:/minio_data
milvus:
image: milvusdb/milvus:v2.4-latest
command: ["milvus", "run", "standalone"]
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
ports:
- "19530:19530"
- "9091:9091"
depends_on:
- etcd
- minio
volumes:
- milvus_data:/var/lib/milvus
volumes:
etcd_data:
minio_data:
milvus_data:
Python SDK
pip install pymilvus
# With model support (embeddings)
pip install "pymilvus[model]"
Core Operations
Connect and Create Collection
from pymilvus import MilvusClient
# Connect to Milvus
client = MilvusClient(uri="http://localhost:19530")
# Create collection with auto schema
client.create_collection(
collection_name="documents",
dimension=1536, # OpenAI embedding dimension
metric_type="COSINE"
)
# Create collection with custom schema
from pymilvus import CollectionSchema, FieldSchema, DataType
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1536),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="source", dtype=DataType.VARCHAR, max_length=256),
FieldSchema(name="timestamp", dtype=DataType.INT64),
]
schema = CollectionSchema(fields=fields, description="Document embeddings")
client.create_collection(
collection_name="documents_custom",
schema=schema
)
Insert Data
import numpy as np
# Insert with auto-generated IDs
data = [
{"embedding": np.random.rand(1536).tolist(), "text": "RAG overview", "source": "wiki", "timestamp": 1700000000},
{"embedding": np.random.rand(1536).tolist(), "text": "Vector search", "source": "docs", "timestamp": 1700000100},
]
client.insert(collection_name="documents", data=data)
# Bulk insert
vectors = np.random.rand(10000, 1536).tolist()
texts = [f"Document {i}" for i in range(10000)]
data = [{"embedding": v, "text": t, "source": "batch", "timestamp": 0} for v, t in zip(vectors, texts)]
client.insert(collection_name="documents", data=data)
Search
# Basic vector search
query_vector = np.random.rand(1536).tolist()
results = client.search(
collection_name="documents",
data=[query_vector],
limit=10,
output_fields=["text", "source"]
)
for hits in results:
for hit in hits:
print(f"ID: {hit['id']}, Distance: {hit['distance']:.4f}, Text: {hit['entity']['text']}")
# Search with scalar filtering
results = client.search(
collection_name="documents",
data=[query_vector],
limit=5,
filter='source == "wiki" and timestamp > 1700000000',
output_fields=["text", "source", "timestamp"]
)
# Multi-vector search (hybrid)
from pymilvus import AnnSearchRequest, RRFRanker
req1 = AnnSearchRequest(data=[dense_vector], anns_field="dense_embedding", param={"metric_type": "COSINE"}, limit=20)
req2 = AnnSearchRequest(data=[sparse_vector], anns_field="sparse_embedding", param={"metric_type": "IP"}, limit=20)
results = client.hybrid_search(
collection_name="hybrid_docs",
reqs=[req1, req2],
ranker=RRFRanker(k=60),
limit=10,
output_fields=["text"]
)
Index Types
| Index | Type | Best For | Memory |
|---|---|---|---|
FLAT | Brute force | Small datasets (<1M) | High |
IVF_FLAT | Inverted file | Medium datasets | Medium |
IVF_SQ8 | Scalar quantization | Balance speed/accuracy | Low |
IVF_PQ | Product quantization | Large datasets | Very low |
HNSW | Graph-based | Low latency search | High |
DISKANN | Disk-based | Billion-scale | Very low |
GPU_IVF_FLAT | GPU accelerated | High throughput | GPU |
GPU_CAGRA | GPU graph | Fastest GPU search | GPU |
# Create index
client.create_index(
collection_name="documents",
field_name="embedding",
index_params={
"index_type": "HNSW",
"metric_type": "COSINE",
"params": {"M": 16, "efConstruction": 256}
}
)
Configuration
Search Parameters
# HNSW search params
search_params = {"metric_type": "COSINE", "params": {"ef": 128}}
# IVF search params
search_params = {"metric_type": "L2", "params": {"nprobe": 32}}
# DiskANN search params
search_params = {"metric_type": "COSINE", "params": {"search_list": 100}}
results = client.search(
collection_name="documents",
data=[query_vector],
limit=10,
search_params=search_params
)
Collection Management
# List collections
collections = client.list_collections()
# Get collection info
info = client.describe_collection("documents")
print(f"Rows: {info['row_count']}")
# Drop collection
client.drop_collection("documents")
# Create partition
client.create_partition(collection_name="documents", partition_name="2024")
# Insert into partition
client.insert(collection_name="documents", data=data, partition_name="2024")
Advanced Usage
Sparse Vectors (BM25/SPLADE)
from pymilvus import FieldSchema, DataType
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=1536),
FieldSchema(name="sparse", dtype=DataType.SPARSE_FLOAT_VECTOR),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
]
# Insert sparse vectors
sparse_data = [{0: 0.5, 102: 0.3, 2048: 0.8}] # {dimension_index: value}
Built-in Embedding Functions
from pymilvus.model.hybrid import BGEM3EmbeddingFunction
ef = BGEM3EmbeddingFunction(model_name="BAAI/bge-m3", device="cpu")
docs = ["What is RAG?", "Vector databases store embeddings."]
embeddings = ef.encode_documents(docs)
# Returns both dense and sparse vectors
print(f"Dense shape: {embeddings['dense'][0].shape}")
print(f"Sparse keys: {len(embeddings['sparse'][0])}")
Role-Based Access Control
from pymilvus import MilvusClient
client = MilvusClient(uri="http://localhost:19530", token="root:Milvus")
# Create role
client.create_role("reader")
client.grant_privilege("reader", "Collection", "documents", "Search")
client.grant_privilege("reader", "Collection", "documents", "Query")
# Create user
client.create_user("analyst", "password123")
client.grant_role("analyst", "reader")
Troubleshooting
| Issue | Solution |
|---|---|
| Connection refused on 19530 | Check Milvus is running: docker ps, verify port mapping |
| Insert performance slow | Increase batch size to 10000+, use bulk insert API |
| Search latency high | Build appropriate index, increase ef/nprobe params |
| Out of memory | Use IVF_PQ or DiskANN index, enable mmap |
| Dimension mismatch error | Ensure query vector dim matches collection field dim |
| etcd connection issues | Check etcd health: etcdctl endpoint health |
| MinIO storage full | Expand MinIO volume, run compaction |
| Query returns empty | Check collection is loaded: client.load_collection() |
# Check Milvus health
curl http://localhost:9091/healthz
# View metrics
curl http://localhost:9091/metrics
# Check logs
docker logs milvus-standalone
# Compact collection (reclaim space)
# Via Python: client.compact("documents")