Skip to content

YARA Cheatsheet

YARA is a powerful pattern matching engine designed to help malware researchers identify and classify malware samples. Developed by Victor Alvarez at VirusTotal, YARA allows you to create descriptions of malware families based on textual or binary patterns, making it an essential tool for threat hunting, incident response, and malware analysis.

Installation

Linux Installation

bash
# Ubuntu/Debian
sudo apt update
sudo apt install yara

# CentOS/RHEL
sudo yum install epel-release
sudo yum install yara

# From source
git clone https://github.com/VirusTotal/yara.git
cd yara
./bootstrap.sh
./configure
make
sudo make install

Windows Installation

powershell
# Download from GitHub releases
# https://github.com/VirusTotal/yara/releases

# Using Chocolatey
choco install yara

# Using vcpkg
vcpkg install yara

Python Integration

bash
# Install yara-python
pip install yara-python

# Verify installation
python -c "import yara; print(yara.__version__)"

Basic Syntax and Structure

Rule Structure

yara
rule RuleName
{
    meta:
        description = "Description of what this rule detects"
        author = "Your Name"
        date = "2024-01-01"
        version = "1.0"
        
    strings:
        $string1 = "malicious string"
        $string2 = { 6A 40 68 00 30 00 00 }
        $regex1 = /malware[0-9]{2,4}/
        
    condition:
        $string1 or $string2 or $regex1
}

String Types

yara
rule StringTypes
{
    strings:
        // Text strings
        $text1 = "Hello World"
        $text2 = "Case Insensitive" nocase
        $text3 = "Wide String" wide
        $text4 = "ASCII String" ascii
        
        // Hexadecimal strings
        $hex1 = { 4D 5A }  // MZ header
        $hex2 = { 4D 5A [0-100] 50 45 }  // MZ...PE
        $hex3 = { 4D 5A ?? ?? ?? ?? 50 45 }  // Wildcards
        
        // Regular expressions
        $regex1 = /md5:[a-f0-9]{32}/
        $regex2 = /https?:\/\/[a-zA-Z0-9.-]+/
        
    condition:
        any of them
}

Advanced String Patterns

Wildcards and Jumps

yara
rule WildcardsAndJumps
{
    strings:
        // Single byte wildcard
        $pattern1 = { 4D 5A ?? ?? 50 45 }
        
        // Multiple byte wildcards
        $pattern2 = { 4D 5A ?? ?? ?? ?? 50 45 }
        
        // Variable length jumps
        $pattern3 = { 4D 5A [0-100] 50 45 }
        $pattern4 = { 4D 5A [10-50] 50 45 }
        
        // Alternatives
        $pattern5 = { 4D 5A ( 90 | 91 | 92 ) 50 45 }
        
    condition:
        any of them
}

String Modifiers

yara
rule StringModifiers
{
    strings:
        $case_insensitive = "malware" nocase
        $wide_string = "malware" wide
        $ascii_string = "malware" ascii
        $fullword = "malware" fullword
        $xor_string = "malware" xor
        $base64_string = "malware" base64
        
    condition:
        any of them
}

Conditions and Logic

Basic Conditions

yara
rule BasicConditions
{
    strings:
        $string1 = "malware"
        $string2 = "virus"
        $string3 = "trojan"
        
    condition:
        // Boolean operators
        $string1 and $string2
        $string1 or $string2
        not $string1
        
        // String count
        #string1 > 5
        #string2 == 1
        
        // Any/All of them
        any of them
        all of them
        2 of them
        
        // String sets
        any of ($string*)
        all of ($string1, $string2)
}

File Properties

yara
rule FileProperties
{
    condition:
        // File size
        filesize > 1MB
        filesize < 10KB
        
        // Entry point
        entrypoint >= 0x1000
        
        // String positions
        $string1 at 0
        $string2 in (0..100)
        
        // String offsets
        @string1[0] < 100
        @string2[1] > 1000
}

Modules and Functions

PE Module

yara
import "pe"

rule PEAnalysis
{
    condition:
        // PE file detection
        pe.is_pe
        
        // Machine type
        pe.machine == pe.MACHINE_I386
        pe.machine == pe.MACHINE_AMD64
        
        // Characteristics
        pe.characteristics & pe.DLL
        pe.characteristics & pe.EXECUTABLE_IMAGE
        
        // Sections
        pe.number_of_sections > 3
        pe.sections[0].name == ".text"
        pe.sections[0].characteristics & pe.SECTION_CNT_CODE
        
        // Imports
        pe.imports("kernel32.dll", "CreateFileA")
        pe.imports("advapi32.dll", "RegSetValueExA")
        
        // Resources
        pe.number_of_resources > 0
        pe.version_info["CompanyName"] contains "Microsoft"
}

Hash Module

yara
import "hash"

rule HashChecks
{
    condition:
        // MD5 hash
        hash.md5(0, filesize) == "5d41402abc4b2a76b9719d911017c592"
        
        // SHA1 hash
        hash.sha1(0, filesize) == "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
        
        // SHA256 hash
        hash.sha256(0, filesize) == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
        
        // Hash of specific sections
        hash.md5(pe.sections[0].raw_data_offset, pe.sections[0].raw_data_size) == "hash_value"
}

Math Module

yara
import "math"

rule MathOperations
{
    condition:
        // Entropy calculation
        math.entropy(0, filesize) > 7.0
        math.entropy(pe.sections[0].raw_data_offset, pe.sections[0].raw_data_size) > 6.5
        
        // Mean calculation
        math.mean(0, filesize) > 128
        
        // Standard deviation
        math.deviation(0, filesize, math.mean(0, filesize)) > 50
}

Malware Detection Rules

Generic Malware Patterns

yara
rule Generic_Malware
{
    meta:
        description = "Generic malware detection"
        author = "Security Analyst"
        date = "2024-01-01"
        
    strings:
        $api1 = "CreateRemoteThread"
        $api2 = "WriteProcessMemory"
        $api3 = "VirtualAllocEx"
        $api4 = "SetWindowsHookEx"
        
        $string1 = "backdoor"
        $string2 = "keylogger"
        $string3 = "rootkit"
        
    condition:
        2 of ($api*) and any of ($string*)
}

Ransomware Detection

yara
rule Ransomware_Indicators
{
    meta:
        description = "Ransomware behavior indicators"
        author = "Security Analyst"
        
    strings:
        $encrypt1 = "CryptEncrypt"
        $encrypt2 = "CryptGenKey"
        $encrypt3 = "CryptAcquireContext"
        
        $file_ext1 = ".encrypted"
        $file_ext2 = ".locked"
        $file_ext3 = ".crypto"
        
        $ransom_note1 = "your files have been encrypted"
        $ransom_note2 = "pay bitcoin"
        $ransom_note3 = "decryption key"
        
    condition:
        2 of ($encrypt*) and (any of ($file_ext*) or any of ($ransom_note*))
}

Banking Trojan

yara
rule Banking_Trojan
{
    meta:
        description = "Banking trojan detection"
        
    strings:
        $bank1 = "online banking"
        $bank2 = "credit card"
        $bank3 = "account number"
        
        $hook1 = "SetWindowsHookEx"
        $hook2 = "GetAsyncKeyState"
        $hook3 = "CallNextHookEx"
        
        $network1 = "HttpSendRequest"
        $network2 = "InternetConnect"
        $network3 = "send"
        
    condition:
        any of ($bank*) and any of ($hook*) and any of ($network*)
}

Advanced Techniques

Yara with Python

python
import yara

# Compile rules from file
rules = yara.compile(filepath='malware_rules.yar')

# Compile rules from string
rule_source = '''
rule TestRule {
    strings:
        $test = "malware"
    condition:
        $test
}
'''
rules = yara.compile(source=rule_source)

# Scan file
matches = rules.match('/path/to/file')
for match in matches:
    print(f"Rule: {match.rule}")
    print(f"Tags: {match.tags}")
    print(f"Meta: {match.meta}")
    print(f"Strings: {match.strings}")

# Scan data
with open('/path/to/file', 'rb') as f:
    data = f.read()
    matches = rules.match(data=data)

# Custom callback
def callback(data):
    print(f"Match found: {data}")
    return yara.CALLBACK_CONTINUE

rules.match('/path/to/file', callback=callback)

Performance Optimization

yara
rule OptimizedRule
{
    meta:
        description = "Performance optimized rule"
        
    strings:
        // Use specific strings first
        $specific = { 4D 5A 90 00 03 00 00 00 }
        
        // Avoid expensive regex
        $simple_string = "malware" nocase
        
        // Use fullword for exact matches
        $exact = "CreateFile" fullword
        
    condition:
        // Check file size first
        filesize > 1KB and filesize < 10MB and
        
        // Check specific patterns first
        $specific at 0 and
        
        // Then check other strings
        ($simple_string or $exact)
}

Multi-stage Detection

yara
rule Multi_Stage_Malware
{
    meta:
        description = "Multi-stage malware detection"
        
    strings:
        // Stage 1: Dropper
        $stage1_1 = "temp.exe"
        $stage1_2 = "CreateProcess"
        
        // Stage 2: Payload
        $stage2_1 = "payload.dll"
        $stage2_2 = "LoadLibrary"
        
        // Stage 3: Persistence
        $stage3_1 = "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run"
        $stage3_2 = "RegSetValueEx"
        
    condition:
        (any of ($stage1_*) and any of ($stage2_*)) or
        (any of ($stage2_*) and any of ($stage3_*))
}

Command Line Usage

Basic Commands

bash
# Scan single file
yara rules.yar /path/to/file

# Scan directory recursively
yara -r rules.yar /path/to/directory

# Scan with multiple rule files
yara rules1.yar rules2.yar /path/to/file

# Output matching strings
yara -s rules.yar /path/to/file

# Output metadata
yara -m rules.yar /path/to/file

# Set timeout
yara -a 60 rules.yar /path/to/file

# Disable warnings
yara -w rules.yar /path/to/file

Advanced Options

bash
# Define external variables
yara -d filename="malware.exe" rules.yar /path/to/file

# Print tags only
yara -t rules.yar /path/to/file

# Print rule names only
yara -n rules.yar /path/to/file

# Count matches
yara -c rules.yar /path/to/file

# Fast mode (no string matching)
yara -f rules.yar /path/to/file

# Scan process memory (Linux)
yara rules.yar /proc/PID/mem

# Maximum number of strings per rule
yara -l 1000 rules.yar /path/to/file

Integration Examples

Volatility Integration

python
# Volatility plugin example
import volatility.plugins.common as common
import yara

class YaraScan(common.AbstractWindowsCommand):
    def __init__(self, config, *args, **kwargs):
        common.AbstractWindowsCommand.__init__(self, config, *args, **kwargs)
        self.rules = yara.compile(filepath='malware_rules.yar')
    
    def calculate(self):
        addr_space = utils.load_as(self._config)
        for proc in tasks.pslist(addr_space):
            proc_space = proc.get_process_address_space()
            if proc_space:
                data = proc_space.read(proc.Peb.ImageBaseAddress, 0x1000)
                matches = self.rules.match(data=data)
                if matches:
                    yield proc, matches

Cuckoo Sandbox Integration

python
# Cuckoo processing module
from lib.cuckoo.common.abstracts import Processing
import yara

class YaraProcessing(Processing):
    def run(self):
        self.key = "yara"
        results = []
        
        rules = yara.compile(filepath='rules/malware.yar')
        
        # Scan dropped files
        for file_path in self.results.get("dropped", []):
            matches = rules.match(file_path["path"])
            if matches:
                results.append({
                    "file": file_path["path"],
                    "matches": [match.rule for match in matches]
                })
        
        return results

Best Practices

Rule Writing Guidelines

yara
rule BestPracticeExample
{
    meta:
        description = "Example of best practices"
        author = "Security Team"
        date = "2024-01-01"
        version = "1.0"
        reference = "https://example.com/analysis"
        hash = "md5hash"
        
    strings:
        // Use descriptive variable names
        $mz_header = { 4D 5A }
        $pe_header = { 50 45 00 00 }
        
        // Group related strings
        $api_process = "CreateProcess"
        $api_thread = "CreateThread"
        $api_file = "CreateFile"
        
        // Use specific patterns
        $specific_string = "unique_malware_string"
        
    condition:
        // Check file type first
        $mz_header at 0 and
        $pe_header and
        
        // Then check specific indicators
        $specific_string and
        2 of ($api_*)
}

Performance Considerations

yara
rule PerformanceOptimized
{
    strings:
        // Anchor strings to specific positions when possible
        $header = { 4D 5A } at 0
        
        // Use fullword for API names
        $api = "CreateFile" fullword
        
        // Avoid overly broad regex
        $url = /https?:\/\/[a-zA-Z0-9.-]{5,50}\.[a-z]{2,4}/
        
    condition:
        // Check file size constraints
        filesize > 1KB and filesize < 5MB and
        
        // Use specific checks first
        $header and
        
        // Then broader checks
        ($api or $url)
}

Error Handling

python
import yara

try:
    # Compile rules with error handling
    rules = yara.compile(filepath='rules.yar')
except yara.SyntaxError as e:
    print(f"Syntax error in rules: {e}")
except yara.Error as e:
    print(f"YARA error: {e}")

try:
    # Scan with timeout
    matches = rules.match('/path/to/file', timeout=60)
except yara.TimeoutError:
    print("Scan timed out")
except yara.Error as e:
    print(f"Scan error: {e}")

Troubleshooting

Common Issues

bash
# Rule compilation errors
yara -w rules.yar  # Check for warnings

# Memory issues with large files
yara -f rules.yar /large/file  # Fast mode

# Timeout issues
yara -a 300 rules.yar /path/to/file  # Increase timeout

# String encoding issues
# Use 'wide' modifier for Unicode strings
# Use 'ascii' modifier for ASCII strings

# Performance issues
# Use more specific conditions
# Avoid expensive regex patterns
# Use file size constraints

Debugging Rules

yara
rule DebugRule
{
    meta:
        description = "Debug rule with verbose conditions"
        
    strings:
        $debug1 = "test string"
        $debug2 = { 41 42 43 }
        
    condition:
        // Add file size check for debugging
        filesize > 0 and
        
        // Check each string individually
        ($debug1 and #debug1 > 0) or
        ($debug2 and #debug2 > 0)
}

Resources