Appearance
Sumo Logic Cheatsheet
Sumo Logic is a cloud-native machine data analytics platform that provides real-time insights into application, infrastructure, and security data. As a Software-as-a-Service (SaaS) solution, Sumo Logic enables organizations to collect, search, and analyze massive volumes of structured and unstructured data from across their entire technology stack, providing comprehensive visibility for operational intelligence, security monitoring, and business analytics.
Platform Overview
Core Architecture
Sumo Logic operates on a multi-tenant, cloud-native architecture designed for massive scale and real-time processing. The platform consists of several key components that work together to provide comprehensive data analytics capabilities.
The data collection layer utilizes lightweight collectors that can be deployed as installed collectors on individual systems or hosted collectors that receive data via HTTP endpoints. These collectors support a wide variety of data sources including log files, metrics, traces, and custom applications through APIs and webhooks.
The data processing engine performs real-time parsing, enrichment, and indexing of incoming data streams. Sumo Logic's proprietary search technology enables sub-second query performance across petabytes of data, while machine learning algorithms automatically detect patterns, anomalies, and trends in the data.
Key Features
bash
# Core Platform Capabilities
- Real-time log analytics and search
- Metrics monitoring and alerting
- Security information and event management (SIEM)
- Application performance monitoring (APM)
- Infrastructure monitoring
- Compliance and audit reporting
- Machine learning and predictive analytics
- Custom dashboards and visualizations
Data Collection and Sources
Installed Collectors
bash
# Download and install collector (Linux)
wget https://collectors.sumologic.com/rest/download/linux/64 -O SumoCollector.sh
sudo bash SumoCollector.sh -q -Vsumo.accessid=<ACCESS_ID> -Vsumo.accesskey=<ACCESS_KEY>
# Install as service
sudo /opt/SumoCollector/collector install
sudo /opt/SumoCollector/collector start
# Check collector status
sudo /opt/SumoCollector/collector status
# View collector logs
tail -f /opt/SumoCollector/logs/collector.log
Hosted Collectors
bash
# Create HTTP source endpoint
curl -X POST https://api.sumologic.com/api/v1/collectors/<COLLECTOR_ID>/sources \
-H "Authorization: Basic <BASE64_CREDENTIALS>" \
-H "Content-Type: application/json" \
-d '{
"source": {
"name": "HTTP Source",
"category": "prod/web/access",
"hostName": "web-server-01",
"sourceType": "HTTP"
}
}'
# Send data to HTTP endpoint
curl -X POST https://endpoint.collection.sumologic.com/receiver/v1/http/<UNIQUE_ID> \
-H "Content-Type: application/json" \
-d '{"timestamp": "2023-01-01T12:00:00Z", "level": "INFO", "message": "Application started"}'
Log File Collection
bash
# Configure local file source
{
"source": {
"name": "Application Logs",
"category": "prod/app/logs",
"pathExpression": "/var/log/myapp/*.log",
"sourceType": "LocalFile",
"multilineProcessingEnabled": true,
"useAutolineMatching": true
}
}
# Configure remote file source
{
"source": {
"name": "Remote Syslog",
"category": "prod/system/syslog",
"protocol": "UDP",
"port": 514,
"sourceType": "Syslog"
}
}
Search Language and Queries
Basic Search Syntax
bash
# Simple keyword search
error
# Field-based search
_sourceCategory=prod/web/access
# Time range search
_sourceCategory=prod/web/access | where _messageTime > now() - 1h
# Boolean operators
error AND (database OR connection)
error NOT timeout
(status_code=500 OR status_code=404)
# Wildcard searches
error*
*connection*
user_id=12345*
Advanced Search Operations
bash
# Parse and extract fields
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * * \"*\" \"*\"" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size, referer, user_agent
# Regular expression parsing
_sourceCategory=prod/app/logs
| parse regex "(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) \[(?<level>\w+)\] (?<message>.*)"
# JSON parsing
_sourceCategory=prod/api/logs
| json field=_raw "user_id" as user_id
| json field=_raw "action" as action
| json field=_raw "timestamp" as event_time
# CSV parsing
_sourceCategory=prod/data/csv
| csv _raw extract 1 as user_id, 2 as action, 3 as timestamp
Aggregation and Statistics
bash
# Count operations
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
| count by status_code
# Sum and average
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
| sum(size) as total_bytes, avg(size) as avg_bytes by src_ip
# Timeslice aggregation
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
| timeslice 1m
| count by _timeslice, status_code
# Percentile calculations
_sourceCategory=prod/app/performance
| parse "response_time=*" as response_time
| pct(response_time, 50, 90, 95, 99) by service_name
Data Transformation
bash
# Field manipulation
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
| if(status_code matches "2*", "success", "error") as result_type
| if(size > 1000000, "large", "normal") as file_size_category
# String operations
_sourceCategory=prod/app/logs
| parse "user=*" as user_id
| toUpperCase(user_id) as user_id_upper
| toLowerCase(user_id) as user_id_lower
| substring(user_id, 0, 3) as user_prefix
# Date and time operations
_sourceCategory=prod/app/logs
| parse "timestamp=*" as event_time
| parseDate(event_time, "yyyy-MM-dd HH:mm:ss") as parsed_time
| formatDate(parsed_time, "yyyy-MM-dd") as date_only
| formatDate(parsed_time, "HH:mm:ss") as time_only
Metrics and Monitoring
Metrics Collection
bash
# Host metrics collection
{
"source": {
"name": "Host Metrics",
"category": "prod/infrastructure/metrics",
"sourceType": "SystemStats",
"interval": 60000,
"hostName": "web-server-01"
}
}
# Custom metrics via HTTP
curl -X POST https://endpoint.collection.sumologic.com/receiver/v1/http/<UNIQUE_ID> \
-H "Content-Type: application/vnd.sumologic.carbon2" \
-d "metric=cpu.usage.percent host=web-01 service=nginx 85.2 1640995200"
# Application metrics
curl -X POST https://endpoint.collection.sumologic.com/receiver/v1/http/<UNIQUE_ID> \
-H "Content-Type: application/vnd.sumologic.prometheus" \
-d "# HELP http_requests_total Total HTTP requests
# TYPE http_requests_total counter
http_requests_total{method=\"GET\",status=\"200\"} 1234
http_requests_total{method=\"POST\",status=\"201\"} 567"
Metrics Queries
bash
# Basic metrics query
metric=cpu.usage.percent host=web-01 | avg by host
# Time series aggregation
metric=memory.usage.percent
| avg by host
| timeslice 5m
# Multiple metrics correlation
(metric=cpu.usage.percent OR metric=memory.usage.percent) host=web-01
| avg by metric, host
| timeslice 1m
# Metrics with thresholds
metric=disk.usage.percent
| where %"disk.usage.percent" > 80
| max by host, mount_point
Alerting and Notifications
bash
# Create scheduled search alert
{
"searchName": "High Error Rate Alert",
"searchDescription": "Alert when error rate exceeds 5%",
"searchQuery": "_sourceCategory=prod/web/access | parse \"* * * [*] \\\"* * *\\\" * *\" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size | where status_code matches \"5*\" | count as error_count | if(error_count > 100, \"CRITICAL\", \"OK\") as alert_level | where alert_level = \"CRITICAL\"",
"searchSchedule": {
"cronExpression": "0 */5 * * * ? *",
"displayableTimeRange": "-5m",
"parseableTimeRange": {
"type": "BeginBoundedTimeRange",
"from": {
"type": "RelativeTimeRangeBoundary",
"relativeTime": "-5m"
}
}
},
"searchNotification": {
"taskType": "EmailSearchNotificationSyncDefinition",
"toList": ["admin@company.com"],
"subject": "High Error Rate Detected",
"includeQuery": true,
"includeResultSet": true,
"includeHistogram": true
}
}
Security and SIEM Capabilities
Security Event Analysis
bash
# Failed login detection
_sourceCategory=prod/security/auth
| parse "user=* action=* result=* src_ip=*" as user, action, result, src_ip
| where action = "login" and result = "failed"
| count by user, src_ip
| where _count > 5
# Suspicious network activity
_sourceCategory=prod/network/firewall
| parse "src=* dst=* port=* action=*" as src_ip, dst_ip, dst_port, action
| where action = "blocked"
| count by src_ip, dst_port
| sort by _count desc
# Malware detection
_sourceCategory=prod/security/antivirus
| parse "file=* threat=* action=*" as file_path, threat_name, action
| where action = "quarantined"
| count by threat_name
| sort by _count desc
Threat Intelligence Integration
bash
# IP reputation lookup
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
| lookup type="ip" input="src_ip" output="reputation", "country", "organization"
| where reputation = "malicious"
# Domain reputation analysis
_sourceCategory=prod/dns/logs
| parse "query=* response=*" as domain, ip_address
| lookup type="domain" input="domain" output="category", "reputation"
| where category contains "malware" or reputation = "suspicious"
# File hash analysis
_sourceCategory=prod/security/endpoint
| parse "file_hash=* file_name=*" as file_hash, file_name
| lookup type="hash" input="file_hash" output="malware_family", "first_seen"
| where isNotNull(malware_family)
Compliance and Audit
bash
# PCI DSS compliance monitoring
_sourceCategory=prod/payment/logs
| parse "card_number=* transaction_id=* amount=*" as card_number, transaction_id, amount
| where card_number matches "*****"
| count by _timeslice(1h)
# GDPR data access logging
_sourceCategory=prod/app/audit
| parse "user=* action=* data_type=* record_id=*" as user, action, data_type, record_id
| where data_type = "personal_data" and action = "access"
| count by user, data_type
# SOX financial controls
_sourceCategory=prod/financial/system
| parse "user=* action=* amount=* approval_status=*" as user, action, amount, approval_status
| where amount > 10000 and approval_status != "approved"
| count by user, action
Dashboards and Visualizations
Dashboard Creation
bash
# Create dashboard via API
curl -X POST https://api.sumologic.com/api/v1/dashboards \
-H "Authorization: Basic <BASE64_CREDENTIALS>" \
-H "Content-Type: application/json" \
-d '{
"title": "Web Application Performance",
"description": "Real-time monitoring of web application metrics",
"folderId": "000000000000000A",
"topologyLabelMap": {
"data": {}
},
"domain": "app",
"panels": [
{
"id": "panel1",
"key": "panel1",
"title": "Request Rate",
"visualSettings": "{\"general\":{\"mode\":\"timeSeries\",\"type\":\"line\"}}",
"keepVisualSettingsConsistentWithParent": true,
"panelType": "SumoSearchPanel",
"queries": [
{
"queryString": "_sourceCategory=prod/web/access | timeslice 1m | count by _timeslice",
"queryType": "Logs",
"queryKey": "A",
"metricsQueryMode": null,
"metricsQueryData": null,
"tracesQueryData": null,
"parseMode": "Manual",
"timeSource": "Message"
}
]
}
]
}'
Chart Types and Configurations
bash
# Time series chart
{
"visualSettings": {
"general": {
"mode": "timeSeries",
"type": "line"
},
"series": {
"A": {
"color": "#1f77b4"
}
}
}
}
# Bar chart
{
"visualSettings": {
"general": {
"mode": "distribution",
"type": "bar"
}
}
}
# Pie chart
{
"visualSettings": {
"general": {
"mode": "distribution",
"type": "pie"
}
}
}
# Single value display
{
"visualSettings": {
"general": {
"mode": "singleValue",
"type": "svp"
}
}
}
API Integration and Automation
REST API Authentication
bash
# Generate access credentials
curl -X POST https://api.sumologic.com/api/v1/accessKeys \
-H "Authorization: Basic <BASE64_CREDENTIALS>" \
-H "Content-Type: application/json" \
-d '{
"label": "API Integration Key",
"corsHeaders": ["*"]
}'
# Use access key for authentication
ACCESS_ID="your_access_id"
ACCESS_KEY="your_access_key"
CREDENTIALS=$(echo -n "$ACCESS_ID:$ACCESS_KEY" | base64)
# Test API connection
curl -X GET https://api.sumologic.com/api/v1/collectors \
-H "Authorization: Basic $CREDENTIALS"
Search Job Management
bash
# Create search job
curl -X POST https://api.sumologic.com/api/v1/search/jobs \
-H "Authorization: Basic <BASE64_CREDENTIALS>" \
-H "Content-Type: application/json" \
-d '{
"query": "_sourceCategory=prod/web/access | count by status_code",
"from": "2023-01-01T00:00:00Z",
"to": "2023-01-01T23:59:59Z",
"timeZone": "UTC"
}'
# Check search job status
curl -X GET https://api.sumologic.com/api/v1/search/jobs/<JOB_ID> \
-H "Authorization: Basic <BASE64_CREDENTIALS>"
# Get search results
curl -X GET https://api.sumologic.com/api/v1/search/jobs/<JOB_ID>/records \
-H "Authorization: Basic <BASE64_CREDENTIALS>"
# Delete search job
curl -X DELETE https://api.sumologic.com/api/v1/search/jobs/<JOB_ID> \
-H "Authorization: Basic <BASE64_CREDENTIALS>"
Content Management
bash
# Export content
curl -X POST https://api.sumologic.com/api/v2/content/<CONTENT_ID>/export \
-H "Authorization: Basic <BASE64_CREDENTIALS>" \
-H "Content-Type: application/json" \
-d '{
"isAdminMode": false
}'
# Import content
curl -X POST https://api.sumologic.com/api/v2/content/folders/<FOLDER_ID>/import \
-H "Authorization: Basic <BASE64_CREDENTIALS>" \
-H "Content-Type: application/json" \
-d '{
"content": "<EXPORTED_CONTENT>",
"overwrite": false
}'
# List folder contents
curl -X GET https://api.sumologic.com/api/v2/content/folders/<FOLDER_ID> \
-H "Authorization: Basic <BASE64_CREDENTIALS>"
Performance Optimization
Query Optimization
bash
# Use specific source categories
_sourceCategory=prod/web/access # Good
* # Avoid - searches all data
# Limit time ranges
_sourceCategory=prod/web/access | where _messageTime > now() - 1h # Good
_sourceCategory=prod/web/access # Avoid - searches all time
# Use early filtering
_sourceCategory=prod/web/access
| where status_code = "500" # Good - filter early
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
# Optimize parsing
_sourceCategory=prod/web/access
| parse "* * * [*] \"* * *\" * *" as src_ip, ident, user, timestamp, method, url, protocol, status_code, size
| where status_code = "500" # Less efficient - parse then filter
Data Volume Management
bash
# Monitor data volume
_index=sumologic_volume
| where _sourceCategory matches "*"
| sum(sizeInBytes) as totalBytes by _sourceCategory
| sort by totalBytes desc
# Set up data volume alerts
_index=sumologic_volume
| where _sourceCategory = "prod/web/access"
| sum(sizeInBytes) as dailyBytes
| where dailyBytes > 10000000000 # 10GB threshold
# Optimize collection
{
"source": {
"name": "Optimized Log Source",
"category": "prod/app/logs",
"pathExpression": "/var/log/myapp/*.log",
"sourceType": "LocalFile",
"filters": [
{
"filterType": "Exclude",
"name": "Exclude Debug Logs",
"regexp": ".*DEBUG.*"
}
]
}
}
Troubleshooting and Best Practices
Common Issues
bash
# Check collector connectivity
curl -v https://collectors.sumologic.com/receiver/v1/http/<UNIQUE_ID>
# Verify data ingestion
_sourceCategory=<YOUR_CATEGORY>
| count by _sourceHost, _sourceCategory
| sort by _count desc
# Debug parsing issues
_sourceCategory=prod/app/logs
| limit 10
| parse "timestamp=*" as event_time
| where isNull(event_time)
# Monitor search performance
_index=sumologic_search_usage
| where query_user = "your_username"
| avg(scan_bytes), avg(execution_time_ms) by query_user
Security Best Practices
bash
# Implement role-based access control
{
"roleName": "Security Analyst",
"description": "Read-only access to security logs",
"filterPredicate": "_sourceCategory=prod/security/*",
"capabilities": [
"viewCollectors",
"searchAuditIndex"
]
}
# Set up audit logging
_index=sumologic_audit
| where event_name = "SearchQueryExecuted"
| count by user_name, source_ip
| sort by _count desc
# Monitor privileged access
_index=sumologic_audit
| where event_name matches "*Admin*"
| count by user_name, event_name
| sort by _count desc
Performance Monitoring
bash
# Monitor search performance
_index=sumologic_search_usage
| avg(scan_bytes) as avg_scan_bytes, avg(execution_time_ms) as avg_execution_time
| sort by avg_execution_time desc
# Track data ingestion rates
_index=sumologic_volume
| timeslice 1h
| sum(messageCount) as messages_per_hour by _timeslice
| sort by _timeslice desc
# Monitor collector health
_sourceCategory=sumo/collector/health
| parse "status=*" as collector_status
| count by collector_status, _sourceHost
| where collector_status != "healthy"