Solr
Apache Solr is an open-source search server that provides search and navigation features for applications using the Lucene library.
Installation
Linux Installation
# Download Solr
wget https://archive.apache.org/dist/solr/solr-9.0.0/solr-9.0.0.tar.gz
tar xzf solr-9.0.0.tar.gz
cd solr-9.0.0
# Install
./bin/install_solr_service.sh ../solr-9.0.0.tar.gz
# Start service
sudo systemctl start solr
sudo systemctl enable solr
# Status
sudo systemctl status solr
# Access: http://localhost:8983/solr
Docker
docker run -d \
--name solr \
-p 8983:8983 \
solr:9.0.0
# Execute commands
docker exec -it solr solr status
Docker Compose
version: '3'
services:
zookeeper:
image: zookeeper:3.8
ports:
- "2181:2181"
solr:
image: solr:9.0.0
ports:
- "8983:8983"
environment:
- ZK_HOST=zookeeper:2181
depends_on:
- zookeeper
Collection Management
Create Collection
# Standalone collection
solr create -c products -d basic_configs
# With custom config
solr create -c articles \
-d /path/to/config_dir \
-s 2 -rf 2 # shards and replicas
# Via API
curl -X POST http://localhost:8983/api/collections \
-H "Content-Type: application/json" \
-d '{
"action": "CREATE",
"name": "products",
"numShards": 1,
"replicationFactor": 1,
"configName": "default"
}'
Manage Collections
# List collections
curl -s http://localhost:8983/api/collections | jq '.collections'
# Get collection info
curl -s http://localhost:8983/api/collections/products | jq '.'
# Reload collection
curl -X POST http://localhost:8983/api/collections/products/reload
# Delete collection
curl -X DELETE http://localhost:8983/api/collections/products
Indexing Documents
Index JSON Documents
# Single document
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '[{"id": "1", "name": "Laptop", "price": 999.99}]'
# Bulk documents
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '[
{"id": "1", "name": "Laptop", "price": 999.99},
{"id": "2", "name": "Mouse", "price": 29.99},
{"id": "3", "name": "Keyboard", "price": 79.99}
]'
# Commit changes
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '{"commit":{}}'
Index CSV Data
# CSV import
curl -X POST http://localhost:8983/solr/products/update/csv \
-H "Content-Type: application/csv" \
--data-binary @products.csv
# Commit
curl http://localhost:8983/solr/products/update?commit=true
Delete Documents
# Delete by ID
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '{"delete":{"id":"1"}}'
# Delete by query
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '{"delete":{"query":"price:[0 TO 50]"}}'
# Clear all
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '{"delete":{"query":"*:*"}}'
Search Queries
Basic Search
# Simple query
curl "http://localhost:8983/solr/products/select?q=laptop"
# Field-specific search
curl "http://localhost:8983/solr/products/select?q=name:laptop"
# Multiple fields
curl "http://localhost:8983/solr/products/select?q=laptop&df=name,description"
# Exact phrase
curl "http://localhost:8983/solr/products/select?q=\"gaming laptop\""
# Range query
curl "http://localhost:8983/solr/products/select?q=price:[500 TO 1000]"
Advanced Query Syntax
# Boolean operators
curl "http://localhost:8983/solr/products/select?q=laptop AND gaming"
curl "http://localhost:8983/solr/products/select?q=laptop OR tablet"
curl "http://localhost:8983/solr/products/select?q=laptop NOT gaming"
# Wildcard
curl "http://localhost:8983/solr/products/select?q=lapt*"
# Fuzzy search (typo tolerance)
curl "http://localhost:8983/solr/products/select?q=lapto~"
# Boost relevance
curl "http://localhost:8983/solr/products/select?q=laptop^3 OR tablet"
Filtering Results
# Filter query (doesn't affect score)
curl "http://localhost:8983/solr/products/select?q=laptop&fq=price:[100 TO 500]&fq=in_stock:true"
# Pagination
curl "http://localhost:8983/solr/products/select?q=laptop&start=0&rows=10"
# Sort
curl "http://localhost:8983/solr/products/select?q=laptop&sort=price+asc"
# Return specific fields
curl "http://localhost:8983/solr/products/select?q=laptop&fl=id,name,price"
Faceting and Analytics
Configure Facets
# Field faceting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.field=brand&facet.field=category"
# Range faceting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.range=price&facet.range.start=0&facet.range.end=1000&facet.range.gap=100"
# Query faceting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.query=price:[0 TO 500]&facet.query=price:[500 TO 1000]"
# Facet limit/sorting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.field=brand&facet.limit=10&facet.sort=count"
Schema Management
View Schema
# Get schema info
curl -s http://localhost:8983/api/collections/products/schema | jq '.schema'
# Get field definitions
curl -s http://localhost:8983/api/collections/products/schema/fields | jq '.fields'
# Get field type
curl -s http://localhost:8983/api/collections/products/schema/fieldtypes | jq '.fieldTypes[] | .name'
Add Fields
# Add new field
curl -X POST http://localhost:8983/api/collections/products/schema \
-H "Content-Type: application/json" \
-d '{
"add-field": {
"name": "color",
"type": "string",
"stored": true,
"indexed": true
}
}'
# Add dynamic field
curl -X POST http://localhost:8983/api/collections/products/schema \
-H "Content-Type: application/json" \
-d '{
"add-dynamic-field": {
"name": "*_text",
"type": "text_general",
"stored": true,
"indexed": true
}
}'
Performance Tuning
Cache Configuration
# Check cache stats
curl -s http://localhost:8983/solr/products/admin/cache | jq '.cache_stats'
# Clear caches
curl -X POST http://localhost:8983/solr/products/admin/cache?action=flush
# Cache config (solrconfig.xml)
<queryResultCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="32"
/>
Commit Optimization
# Commit with optimization
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '{"commit":{"optimize":true,"waitSearcher":true}}'
# Soft commit (near real-time)
curl -X POST http://localhost:8983/solr/products/update \
-H "Content-Type: application/json" \
-d '{"commit":{"softCommit":true}}'
Client Libraries
SolrJ (Java)
SolrClient solr = new HttpSolrClient.Builder("http://localhost:8983/solr/products").build();
// Add document
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "1");
doc.addField("name", "Laptop");
doc.addField("price", 999.99);
solr.add(doc);
solr.commit();
// Search
SolrQuery query = new SolrQuery("laptop");
query.setRows(10);
QueryResponse response = solr.query(query);
Python (pysolr)
import pysolr
solr = pysolr.Solr('http://localhost:8983/solr/products')
# Index
solr.add([
{"id": "1", "name": "Laptop", "price": 999.99},
{"id": "2", "name": "Mouse", "price": 29.99}
])
# Search
results = solr.search('laptop', **{'rows': 10})
for result in results:
print(result)
JavaScript/Node.js
const SolrClient = require('solr-client');
const client = new SolrClient({
host: 'localhost',
port: 8983,
path: '/solr/products'
});
// Add documents
client.add([
{id: "1", name: "Laptop", price: 999.99},
{id: "2", name: "Mouse", price: 29.99}
], (err, resp) => {
client.search('laptop', (err, obj) => {
console.log(obj.responseHeader);
});
});
Monitoring and Administration
Health Check
# System info
curl -s http://localhost:8983/api/system | jq '.system'
# Collection status
curl -s http://localhost:8983/api/cluster/state | jq '.state'
# Core status
curl -s http://localhost:8983/solr/admin/cores | jq '.status'
# Index stats
curl -s http://localhost:8983/solr/products/admin/stats | jq '.index'
Backup and Restore
# Create backup
curl "http://localhost:8983/solr/products/replication?command=backup&location=/backups"
# Restore from backup
curl "http://localhost:8983/solr/products/replication?command=restore&location=/backups&name=snapshot.20250330"
Best Practices
- Use appropriate field types (text_general, int, date, etc.)
- Configure sharding for large datasets
- Use filter queries to reduce full-text search scope
- Implement soft commits for near real-time search
- Monitor cache hit rates and tune sizes
- Backup regularly
- Use faceting for navigation UI
- Implement security with authentication
- Monitor disk space and index size
- Use WAL (Write-Ahead Logging) for durability