Zum Inhalt springen

Solr

Apache Solr is an open-source search server that provides search and navigation features for applications using the Lucene library.

Installation

Linux Installation

# Download Solr
wget https://archive.apache.org/dist/solr/solr-9.0.0/solr-9.0.0.tar.gz
tar xzf solr-9.0.0.tar.gz
cd solr-9.0.0

# Install
./bin/install_solr_service.sh ../solr-9.0.0.tar.gz

# Start service
sudo systemctl start solr
sudo systemctl enable solr

# Status
sudo systemctl status solr

# Access: http://localhost:8983/solr

Docker

docker run -d \
  --name solr \
  -p 8983:8983 \
  solr:9.0.0

# Execute commands
docker exec -it solr solr status

Docker Compose

version: '3'
services:
  zookeeper:
    image: zookeeper:3.8
    ports:
      - "2181:2181"

  solr:
    image: solr:9.0.0
    ports:
      - "8983:8983"
    environment:
      - ZK_HOST=zookeeper:2181
    depends_on:
      - zookeeper

Collection Management

Create Collection

# Standalone collection
solr create -c products -d basic_configs

# With custom config
solr create -c articles \
  -d /path/to/config_dir \
  -s 2 -rf 2  # shards and replicas

# Via API
curl -X POST http://localhost:8983/api/collections \
  -H "Content-Type: application/json" \
  -d '{
    "action": "CREATE",
    "name": "products",
    "numShards": 1,
    "replicationFactor": 1,
    "configName": "default"
  }'

Manage Collections

# List collections
curl -s http://localhost:8983/api/collections | jq '.collections'

# Get collection info
curl -s http://localhost:8983/api/collections/products | jq '.'

# Reload collection
curl -X POST http://localhost:8983/api/collections/products/reload

# Delete collection
curl -X DELETE http://localhost:8983/api/collections/products

Indexing Documents

Index JSON Documents

# Single document
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '[{"id": "1", "name": "Laptop", "price": 999.99}]'

# Bulk documents
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '[
    {"id": "1", "name": "Laptop", "price": 999.99},
    {"id": "2", "name": "Mouse", "price": 29.99},
    {"id": "3", "name": "Keyboard", "price": 79.99}
  ]'

# Commit changes
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '{"commit":{}}'

Index CSV Data

# CSV import
curl -X POST http://localhost:8983/solr/products/update/csv \
  -H "Content-Type: application/csv" \
  --data-binary @products.csv

# Commit
curl http://localhost:8983/solr/products/update?commit=true

Delete Documents

# Delete by ID
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '{"delete":{"id":"1"}}'

# Delete by query
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '{"delete":{"query":"price:[0 TO 50]"}}'

# Clear all
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '{"delete":{"query":"*:*"}}'

Search Queries

# Simple query
curl "http://localhost:8983/solr/products/select?q=laptop"

# Field-specific search
curl "http://localhost:8983/solr/products/select?q=name:laptop"

# Multiple fields
curl "http://localhost:8983/solr/products/select?q=laptop&df=name,description"

# Exact phrase
curl "http://localhost:8983/solr/products/select?q=\"gaming laptop\""

# Range query
curl "http://localhost:8983/solr/products/select?q=price:[500 TO 1000]"

Advanced Query Syntax

# Boolean operators
curl "http://localhost:8983/solr/products/select?q=laptop AND gaming"
curl "http://localhost:8983/solr/products/select?q=laptop OR tablet"
curl "http://localhost:8983/solr/products/select?q=laptop NOT gaming"

# Wildcard
curl "http://localhost:8983/solr/products/select?q=lapt*"

# Fuzzy search (typo tolerance)
curl "http://localhost:8983/solr/products/select?q=lapto~"

# Boost relevance
curl "http://localhost:8983/solr/products/select?q=laptop^3 OR tablet"

Filtering Results

# Filter query (doesn't affect score)
curl "http://localhost:8983/solr/products/select?q=laptop&fq=price:[100 TO 500]&fq=in_stock:true"

# Pagination
curl "http://localhost:8983/solr/products/select?q=laptop&start=0&rows=10"

# Sort
curl "http://localhost:8983/solr/products/select?q=laptop&sort=price+asc"

# Return specific fields
curl "http://localhost:8983/solr/products/select?q=laptop&fl=id,name,price"

Faceting and Analytics

Configure Facets

# Field faceting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.field=brand&facet.field=category"

# Range faceting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.range=price&facet.range.start=0&facet.range.end=1000&facet.range.gap=100"

# Query faceting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.query=price:[0 TO 500]&facet.query=price:[500 TO 1000]"

# Facet limit/sorting
curl "http://localhost:8983/solr/products/select?q=laptop&facet=true&facet.field=brand&facet.limit=10&facet.sort=count"

Schema Management

View Schema

# Get schema info
curl -s http://localhost:8983/api/collections/products/schema | jq '.schema'

# Get field definitions
curl -s http://localhost:8983/api/collections/products/schema/fields | jq '.fields'

# Get field type
curl -s http://localhost:8983/api/collections/products/schema/fieldtypes | jq '.fieldTypes[] | .name'

Add Fields

# Add new field
curl -X POST http://localhost:8983/api/collections/products/schema \
  -H "Content-Type: application/json" \
  -d '{
    "add-field": {
      "name": "color",
      "type": "string",
      "stored": true,
      "indexed": true
    }
  }'

# Add dynamic field
curl -X POST http://localhost:8983/api/collections/products/schema \
  -H "Content-Type: application/json" \
  -d '{
    "add-dynamic-field": {
      "name": "*_text",
      "type": "text_general",
      "stored": true,
      "indexed": true
    }
  }'

Performance Tuning

Cache Configuration

# Check cache stats
curl -s http://localhost:8983/solr/products/admin/cache | jq '.cache_stats'

# Clear caches
curl -X POST http://localhost:8983/solr/products/admin/cache?action=flush

# Cache config (solrconfig.xml)
<queryResultCache
  class="solr.LRUCache"
  size="512"
  initialSize="512"
  autowarmCount="32"
/>

Commit Optimization

# Commit with optimization
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '{"commit":{"optimize":true,"waitSearcher":true}}'

# Soft commit (near real-time)
curl -X POST http://localhost:8983/solr/products/update \
  -H "Content-Type: application/json" \
  -d '{"commit":{"softCommit":true}}'

Client Libraries

SolrJ (Java)

SolrClient solr = new HttpSolrClient.Builder("http://localhost:8983/solr/products").build();

// Add document
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "1");
doc.addField("name", "Laptop");
doc.addField("price", 999.99);
solr.add(doc);
solr.commit();

// Search
SolrQuery query = new SolrQuery("laptop");
query.setRows(10);
QueryResponse response = solr.query(query);

Python (pysolr)

import pysolr

solr = pysolr.Solr('http://localhost:8983/solr/products')

# Index
solr.add([
    {"id": "1", "name": "Laptop", "price": 999.99},
    {"id": "2", "name": "Mouse", "price": 29.99}
])

# Search
results = solr.search('laptop', **{'rows': 10})
for result in results:
    print(result)

JavaScript/Node.js

const SolrClient = require('solr-client');
const client = new SolrClient({
  host: 'localhost',
  port: 8983,
  path: '/solr/products'
});

// Add documents
client.add([
  {id: "1", name: "Laptop", price: 999.99},
  {id: "2", name: "Mouse", price: 29.99}
], (err, resp) => {
  client.search('laptop', (err, obj) => {
    console.log(obj.responseHeader);
  });
});

Monitoring and Administration

Health Check

# System info
curl -s http://localhost:8983/api/system | jq '.system'

# Collection status
curl -s http://localhost:8983/api/cluster/state | jq '.state'

# Core status
curl -s http://localhost:8983/solr/admin/cores | jq '.status'

# Index stats
curl -s http://localhost:8983/solr/products/admin/stats | jq '.index'

Backup and Restore

# Create backup
curl "http://localhost:8983/solr/products/replication?command=backup&location=/backups"

# Restore from backup
curl "http://localhost:8983/solr/products/replication?command=restore&location=/backups&name=snapshot.20250330"

Best Practices

  • Use appropriate field types (text_general, int, date, etc.)
  • Configure sharding for large datasets
  • Use filter queries to reduce full-text search scope
  • Implement soft commits for near real-time search
  • Monitor cache hit rates and tune sizes
  • Backup regularly
  • Use faceting for navigation UI
  • Implement security with authentication
  • Monitor disk space and index size
  • Use WAL (Write-Ahead Logging) for durability