Skip to content

CodeQL

CodeQL is GitHub’s static analysis tool for finding security vulnerabilities and code quality issues using semantic queries and semantic analysis.

Installation

macOS

# Homebrew installation
brew install codeql

# Manual download
wget https://github.com/github/codeql-cli-releases/releases/download/v2.16.0/codeql-osx64.zip
unzip codeql-osx64.zip
export PATH=$PATH:$HOME/codeql

Linux

# Ubuntu/Debian
wget https://github.com/github/codeql-cli-releases/releases/download/v2.16.0/codeql-linux64.zip
unzip codeql-linux64.zip
sudo mv codeql /usr/local/bin/

# Verify installation
codeql --version

Windows

# Download from GitHub releases
# https://github.com/github/codeql-cli-releases/releases
# Extract and add to PATH

Database Operations

Create CodeQL Database

# Create database for a GitHub repository
codeql database create <database-name> --language=<language> --source-root=<path>

# Example: Create JavaScript database
codeql database create my-db --language=javascript --source-root=./src

# Create multi-language database
codeql database create my-db --language=java,javascript --source-root=./src

# Create database with explicit command
codeql database create my-db \
  --language=python \
  --command="python -m pip install -r requirements.txt" \
  --source-root=./

# With database scheme
codeql database create my-db \
  --language=cpp \
  --source-root=./src \
  --db-scheme=/custom/db-scheme.yml

Manage Databases

# List databases
codeql database list

# Get database info
codeql database info <database>

# Cleanup unused databases
codeql database cleanup <database>

# Remove database
rm -rf <database-path>

# Bundle database for sharing
codeql database bundle <database> --output=<bundle.zip>

# Unbundle database
codeql database unbundle <bundle.zip> --output=<database>

Running Queries

Basic Query Execution

# Run single query on database
codeql query run <query.ql> --database=<database>

# Run query suite
codeql query run <query-suite.yaml> --database=<database>

# Run query and save results as CSV
codeql query run <query.ql> --database=<database> --output=<results.csv>

# Run with JSON output
codeql query run <query.ql> --database=<database> --output=<results.json> --format=json

Built-in Security Queries

# Run default security and quality queries
codeql database analyze <database> security-and-quality --format=sarif-latest --output=results.sarif

# Analyze with custom query suite
codeql database analyze <database> <path/to/queries> \
  --format=sarif-latest \
  --output=results.sarif

# Analyze specific language
codeql database analyze <database> codeql/java-queries --format=csv --output=results.csv

# CWE-focused analysis
codeql database analyze <database> codeql-suites/javascript-security-and-quality.qls

Language-Specific Commands

JavaScript/TypeScript

# Create JavaScript database
codeql database create js-db --language=javascript --source-root=.

# Run security analysis
codeql database analyze js-db codeql/javascript-queries:security-and-quality --format=sarif-latest

# Check for SQL injection
codeql query run \
  --database=js-db \
  <path-to-query>/sql-injection.ql

Python

# Create Python database
codeql database create py-db --language=python --source-root=.

# Security analysis
codeql database analyze py-db codeql/python-queries:security --format=sarif-latest

# Path traversal detection
codeql query run \
  --database=py-db \
  <path-to-query>/path-injection.ql

Java

# Create Java database with Maven
codeql database create java-db \
  --language=java \
  --command="mvn clean install" \
  --source-root=.

# With Gradle
codeql database create java-db \
  --language=java \
  --command="gradle build" \
  --source-root=.

# Security scanning
codeql database analyze java-db codeql/java-queries:security-and-quality

C/C++

# Create C++ database with make
codeql database create cpp-db \
  --language=cpp \
  --command="make" \
  --source-root=.

# With CMake
codeql database create cpp-db \
  --language=cpp \
  --command="cmake . && make" \
  --source-root=.

Query Development

Create Custom Queries

# Start query development
cat > select-sinks.ql << 'EOF'
import cpp

from FunctionCall fc
where fc.getTarget().getName() = "printf"
select fc
EOF

# Run custom query
codeql query run select-sinks.ql --database=cpp-db

Query Structure

import java

class SQLInjectionVulnerability extends DataFlow::FlowSink {
  SQLInjectionVulnerability() {
    asExpr() instanceof MethodAccess and
    asExpr().(MethodAccess).getMethod().hasName("execute")
  }
}

from DataFlow::PathNode source, DataFlow::PathNode sink
where TaintTracking::localTaintStep(source.getNode(), sink.getNode())
select source.getNode(), source, sink, "Potential SQL injection"

Test Queries

# Run query tests
codeql test run <test-dir>

# Test with verbose output
codeql test run <test-dir> --verbose

# Test specific test file
codeql test run <test-file.ql>

CI/CD Integration

GitHub Actions

name: CodeQL Analysis
on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

jobs:
  analyze:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v2
        with:
          languages: 'javascript,python'

      - name: Build
        run: |
          npm install
          npm run build

      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v2

Manual CI Integration

#!/bin/bash
set -e

# Create database
codeql database create codeql-db --language=javascript --source-root=.

# Run analysis
codeql database analyze codeql-db security-and-quality --format=sarif-latest --output=results.sarif

# Upload results
curl -H "Authorization: token $GITHUB_TOKEN" \
  -F "payload=@results.sarif" \
  "https://api.github.com/repos/$GITHUB_REPOSITORY/code-scanning/sarif"

Troubleshooting

Common Issues

Database creation fails

# Check language support
codeql describe languages

# Verify build command
codeql database create db \
  --language=java \
  --command="mvn clean install" \
  --source-root=. \
  --verbose

Query timeout

# Increase timeout (default 3600 seconds)
codeql query run query.ql --database=db --timeout=7200

Memory issues

# Increase heap size
export CODEQL_JAVA_TOOL_OPTIONS=-Xmx4g
codeql database analyze db query-suite

No results found

# Verify database was created correctly
codeql database info <database>

# Check database stats
codeql database log-summary <database>

# Run simple test query
codeql query run tests/test-query.ql --database=db

Advanced Workflows

Taint Tracking Analysis

import javascript
import DataFlow
import TaintTracking

class Configuration extends TaintTracking::Configuration {
  Configuration() { this = "UserControlledFileName" }

  override predicate isSource(DataFlow::Node source) {
    source instanceof RemoteFlowSource
  }

  override predicate isSink(DataFlow::Node sink) {
    sink = any(FileSystemAccess fs).getAPathArgument()
  }
}

from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink, "User input flows to filesystem access"

Data Flow Configuration

import java
import DataFlow
import FlowSources

class MyConfiguration extends TaintTracking::Configuration {
  MyConfiguration() { this = "MyDataFlow" }

  override predicate isSource(DataFlow::Node n) {
    n instanceof RemoteFlowSource
  }

  override predicate isSink(DataFlow::Node n) {
    exists(MethodAccess m |
      m.getMethod().hasName("exec") and
      n.asExpr() = m.getAnArgument()
    )
  }

  override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
    any()
  }
}

Performance Optimization

Database Tuning

# Create database with optimizations
codeql database create db \
  --language=javascript \
  --source-root=. \
  --dbscheme=/path/to/optimized-scheme.yml

# Use max threads for analysis
codeql database analyze db \
  security-and-quality \
  --threads=0  # Use all available CPU cores

Query Optimization

# Run pre-filtering queries
codeql query run prefilter.ql --database=db

# Analyze only changed files (if tracked)
codeql database analyze db \
  --threads=4 \
  --sarif-category="javascript" \
  security-and-quality

Environment Variables

VariableDescription
CODEQL_HOMEInstallation directory
CODEQL_JAVA_TOOL_OPTIONSJVM options (e.g., -Xmx4g)
GITHUB_TOKENGitHub API authentication
CODEQL_THREADSNumber of threads for processing

Best Practices

  • Store databases in version control for reproducibility
  • Use consistent language and query suite versions
  • Include CodeQL in pre-commit hooks for early detection
  • Regularly update CodeQL CLI and query packs
  • Document custom queries with clear comments
  • Test queries against real vulnerabilities
  • Use SARIF output for integration with other tools
  • Run full analysis periodically, not just on changes
  • Monitor false positive rates and tune queries
  • Archive historical results for trend analysis

Resources


Last updated: 2025-03-30