Aller au contenu

Velero

Velero is an open-source tool for backing up and restoring Kubernetes cluster resources and persistent volumes. It supports scheduled backups, on-demand snapshots, cross-cluster migrations, and integrates with major cloud storage providers.

Installation

Install Velero CLI

# macOS
brew install velero

# Linux — download latest release
VERSION=$(curl -s https://api.github.com/repos/vmware-tanzu/velero/releases/latest | jq -r .tag_name)
curl -L "https://github.com/vmware-tanzu/velero/releases/download/${VERSION}/velero-${VERSION}-linux-amd64.tar.gz" | \
  tar xz -C /tmp
sudo mv /tmp/velero-${VERSION}-linux-amd64/velero /usr/local/bin/

# Verify
velero version

Install on AWS (S3 + EBS Snapshots)

# Create S3 bucket
aws s3 mb s3://my-velero-backups --region us-east-1

# Create credentials file
cat > /tmp/credentials-velero <<EOF
[default]
aws_access_key_id=<YOUR_ACCESS_KEY>
aws_secret_access_key=<YOUR_SECRET_KEY>
EOF

# Install Velero with AWS plugin
velero install \
  --provider aws \
  --plugins velero/velero-plugin-for-aws:v1.10.0 \
  --bucket my-velero-backups \
  --secret-file /tmp/credentials-velero \
  --backup-location-config region=us-east-1 \
  --snapshot-location-config region=us-east-1 \
  --use-node-agent                  # Enable file-system backup (formerly restic)

Install on GCP (GCS + GCE Persistent Disk)

# Install with GCP plugin
velero install \
  --provider gcp \
  --plugins velero/velero-plugin-for-gcp:v1.10.0 \
  --bucket my-velero-gcs-bucket \
  --secret-file /tmp/credentials-velero \
  --backup-location-config serviceAccount=velero@my-project.iam.gserviceaccount.com

# Verify installation
kubectl get all -n velero
velero backup-location get

Install with Helm

helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts
helm repo update

helm install velero vmware-tanzu/velero \
  --namespace velero \
  --create-namespace \
  --set-file credentials.secretContents.cloud=/tmp/credentials-velero \
  --set configuration.backupStorageLocation[0].name=default \
  --set configuration.backupStorageLocation[0].provider=aws \
  --set configuration.backupStorageLocation[0].bucket=my-velero-backups \
  --set configuration.backupStorageLocation[0].config.region=us-east-1 \
  --set initContainers[0].name=velero-plugin-for-aws \
  --set initContainers[0].image=velero/velero-plugin-for-aws:v1.10.0 \
  --set initContainers[0].volumeMounts[0].mountPath=/target \
  --set initContainers[0].volumeMounts[0].name=plugins \
  --set nodeAgent.enabled=true

Configuration

BackupStorageLocation

apiVersion: velero.io/v1
kind: BackupStorageLocation
metadata:
  name: secondary-backup
  namespace: velero
spec:
  provider: aws
  objectStorage:
    bucket: my-secondary-bucket
    prefix: cluster-name/
  config:
    region: eu-west-1
    s3ForcePathStyle: "false"
  credential:
    name: velero-credentials
    key: cloud
  default: false
  accessMode: ReadWrite

VolumeSnapshotLocation

apiVersion: velero.io/v1
kind: VolumeSnapshotLocation
metadata:
  name: aws-ebs
  namespace: velero
spec:
  provider: aws
  config:
    region: us-east-1

Core Commands

CommandDescription
velero backup create <name>Create an on-demand backup
velero backup getList all backups
velero backup describe <name>Show backup details
velero backup logs <name>View backup logs
velero backup delete <name>Delete a backup
velero backup download <name>Download backup tarball
velero restore create --from-backup <name>Restore from a backup
velero restore getList all restores
velero restore describe <name>Show restore details
velero restore logs <name>View restore logs
velero restore delete <name>Delete a restore record
velero schedule create <name>Create a backup schedule
velero schedule getList all schedules
velero schedule delete <name>Delete a schedule
velero schedule pause <name>Pause a schedule
velero schedule unpause <name>Resume a paused schedule
velero backup-location getList backup storage locations
velero snapshot-location getList snapshot locations
velero plugin getList installed plugins
velero versionShow CLI and server versions
velero debugCollect debug information

Advanced Usage

Namespace-Scoped Backup

# Back up specific namespaces
velero backup create production-backup \
  --include-namespaces production,staging \
  --snapshot-volumes \
  --wait

# Exclude specific resources
velero backup create full-backup \
  --exclude-namespaces kube-system,velero \
  --exclude-resources endpoints,events \
  --wait

Label-Selector Backup

# Back up only resources with specific labels
velero backup create app-backup \
  --selector app=myapp,environment=production \
  --include-namespaces production \
  --wait

Scheduled Backups

# Daily backup at 2am, retain for 30 days
velero schedule create daily-backup \
  --schedule="0 2 * * *" \
  --ttl 720h \
  --include-namespaces production,staging

# Weekly full-cluster backup
velero schedule create weekly-full \
  --schedule="0 1 * * 0" \
  --ttl 2160h

# List schedules
velero schedule get

File System Backup (Node Agent / Kopia)

# Opt-in volumes for file-system backup per pod via annotation
kubectl annotate pod -n production my-pod \
  backup.velero.io/backup-volumes=data-volume,config-volume

# Or opt-out specific volumes
kubectl annotate pod -n production my-pod \
  backup.velero.io/backup-volumes-excludes=cache-volume

# Check node-agent pod status
kubectl get pods -n velero -l name=node-agent

Cross-Cluster Migration

# On source cluster — create backup
velero backup create migration-backup \
  --include-namespaces production \
  --snapshot-volumes \
  --wait

# Verify backup is in storage
velero backup describe migration-backup

# On target cluster — install Velero pointing to same storage bucket
# then restore
velero restore create \
  --from-backup migration-backup \
  --namespace-mappings production:production-new \
  --wait

# Map old storage classes to new ones
velero restore create \
  --from-backup migration-backup \
  --restore-volumes=true

Restore to Different Namespace

velero restore create restore-to-staging \
  --from-backup production-backup \
  --namespace-mappings production:staging \
  --restore-volumes \
  --wait

velero restore describe restore-to-staging

Backup Hooks

# Pre/post backup hooks for consistent application state
apiVersion: velero.io/v1
kind: Backup
metadata:
  name: consistent-backup
  namespace: velero
spec:
  includedNamespaces:
    - production
  hooks:
    resources:
      - name: postgres-hook
        includedNamespaces:
          - production
        labelSelector:
          matchLabels:
            app: postgres
        pre:
          - exec:
              container: postgres
              command:
                - /bin/bash
                - -c
                - psql -U postgres -c "CHECKPOINT;"
              onError: Fail
              timeout: 30s
        post:
          - exec:
              container: postgres
              command:
                - /bin/bash
                - -c
                - echo "Backup complete"
              onError: Continue
              timeout: 10s

Common Workflows

Verify Backup Integrity

# Check backup phase
velero backup get
# PHASE: Completed = success, PartiallyFailed = check logs

# Inspect backup contents
velero backup describe my-backup --details | head -100

# Download and inspect the backup tarball
velero backup download my-backup -o /tmp/my-backup.tar.gz
tar tzf /tmp/my-backup.tar.gz | head -50

Disaster Recovery Runbook

# 1. List available backups
velero backup get

# 2. Choose most recent completed backup
BACKUP_NAME=daily-backup-20260516000000

# 3. Create restore
velero restore create dr-restore-$(date +%Y%m%d) \
  --from-backup ${BACKUP_NAME} \
  --restore-volumes \
  --wait

# 4. Monitor restore
velero restore describe dr-restore-$(date +%Y%m%d) --details

# 5. Verify workloads
kubectl get pods -A
kubectl get pvc -A

Monitor Backup Success in CI

# Check that last scheduled backup succeeded
LAST_BACKUP=$(velero backup get --output json | \
  jq -r '.items | sort_by(.metadata.creationTimestamp) | last | .metadata.name')

STATUS=$(velero backup describe ${LAST_BACKUP} --output json | \
  jq -r '.status.phase')

if [ "$STATUS" != "Completed" ]; then
  echo "Backup ${LAST_BACKUP} failed with status: ${STATUS}"
  exit 1
fi
echo "Backup ${LAST_BACKUP} succeeded"

Tips and Best Practices

  • Test restores regularly — a backup that has never been restored is an untested backup; schedule quarterly restore drills.
  • Use --ttl on backups — without a TTL, backups accumulate indefinitely and inflate storage costs.
  • Enable --snapshot-volumes for stateful apps — file-system copies miss in-flight writes; volume snapshots are crash-consistent.
  • Use backup hooks for databases — flush WAL, quiesce writes, or run pg_dump/mysqldump before the snapshot for application-consistent backups.
  • Store backups in a separate cloud account — if your cluster account is compromised, backups in the same account are at risk.
  • Use --namespace-mappings for staging clones — easily spin up production data in a staging namespace for testing without modifying production.
  • Enable node-agent (kopia) for PVCs without snapshot support — useful for NFS, local storage, or any provider without a CSI snapshot driver.
  • Set accessMode: ReadOnly on secondary BSLs — prevents accidental writes to archive locations.
  • Back up before cluster upgrades — always take a manual backup before upgrading Kubernetes or critical operators.
  • Monitor backup durations — sudden increases in backup time can indicate growing PVC sizes or network issues with your storage provider.