Add support for zstd compression (#249)

Co-authored-by: Michal Middleton <jafa81@gmail.com>
This commit is contained in:
Michal Middleton 2023-08-19 12:20:13 -05:00 committed by GitHub
parent 1765b06835
commit 67e7288855
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 195 additions and 74 deletions

View File

@ -148,13 +148,22 @@ You can populate below template according to your requirements and use it as you
# BACKUP_CRON_EXPRESSION="0 2 * * *"
# The name of the backup file including the `.tar.gz` extension.
# The compression algorithm used in conjunction with tar.
# Valid options are: "gz" (Gzip) and "zst" (Zstd).
# Note that the selection affects the file extension.
# BACKUP_COMPRESSION="gz"
# The name of the backup file including the extension.
# Format verbs will be replaced as in `strftime`. Omitting them
# will result in the same filename for every backup run, which means previous
# versions will be overwritten on subsequent runs. The default results
# in filenames like `backup-2021-08-29T04-00-00.tar.gz`.
# versions will be overwritten on subsequent runs.
# Extension can be defined literally or via "{{ .Extension }}" template,
# in which case it will become either "tar.gz" or "tar.zst" (depending
# on your BACKUP_COMPRESSION setting).
# The default results in filenames like: `backup-2021-08-29T04-00-00.tar.gz`.
# BACKUP_FILENAME="backup-%Y-%m-%dT%H-%M-%S.tar.gz"
# BACKUP_FILENAME="backup-%Y-%m-%dT%H-%M-%S.{{ .Extension }}"
# Setting BACKUP_FILENAME_EXPAND to true allows for environment variable
# placeholders in BACKUP_FILENAME, BACKUP_LATEST_SYMLINK and in

View File

@ -15,9 +15,11 @@ import (
"path"
"path/filepath"
"strings"
"github.com/klauspost/compress/zstd"
)
func createArchive(files []string, inputFilePath, outputFilePath string) error {
func createArchive(files []string, inputFilePath, outputFilePath string, compression string) error {
inputFilePath = stripTrailingSlashes(inputFilePath)
inputFilePath, outputFilePath, err := makeAbsolute(inputFilePath, outputFilePath)
if err != nil {
@ -27,7 +29,7 @@ func createArchive(files []string, inputFilePath, outputFilePath string) error {
return fmt.Errorf("createArchive: error creating output file path: %w", err)
}
if err := compress(files, outputFilePath, filepath.Dir(inputFilePath)); err != nil {
if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression); err != nil {
return fmt.Errorf("createArchive: error creating archive: %w", err)
}
@ -51,18 +53,30 @@ func makeAbsolute(inputFilePath, outputFilePath string) (string, string, error)
return inputFilePath, outputFilePath, err
}
func compress(paths []string, outFilePath, subPath string) error {
func compress(paths []string, outFilePath, subPath string, algo string) error {
file, err := os.Create(outFilePath)
var compressWriter io.WriteCloser
if err != nil {
return fmt.Errorf("compress: error creating out file: %w", err)
}
prefix := path.Dir(outFilePath)
gzipWriter := gzip.NewWriter(file)
tarWriter := tar.NewWriter(gzipWriter)
switch algo {
case "gz":
compressWriter = gzip.NewWriter(file)
case "zst":
compressWriter, err = zstd.NewWriter(file)
if err != nil {
return fmt.Errorf("compress: zstd error: %w", err)
}
default:
return fmt.Errorf("compress: unsupported compression algorithm: %s", algo)
}
tarWriter := tar.NewWriter(compressWriter)
for _, p := range paths {
if err := writeTarGz(p, tarWriter, prefix); err != nil {
if err := writeTarball(p, tarWriter, prefix); err != nil {
return fmt.Errorf("compress: error writing %s to archive: %w", p, err)
}
}
@ -72,9 +86,9 @@ func compress(paths []string, outFilePath, subPath string) error {
return fmt.Errorf("compress: error closing tar writer: %w", err)
}
err = gzipWriter.Close()
err = compressWriter.Close()
if err != nil {
return fmt.Errorf("compress: error closing gzip writer: %w", err)
return fmt.Errorf("compress: error closing compression writer: %w", err)
}
err = file.Close()
@ -85,10 +99,10 @@ func compress(paths []string, outFilePath, subPath string) error {
return nil
}
func writeTarGz(path string, tarWriter *tar.Writer, prefix string) error {
func writeTarball(path string, tarWriter *tar.Writer, prefix string) error {
fileInfo, err := os.Lstat(path)
if err != nil {
return fmt.Errorf("writeTarGz: error getting file infor for %s: %w", path, err)
return fmt.Errorf("writeTarball: error getting file infor for %s: %w", path, err)
}
if fileInfo.Mode()&os.ModeSocket == os.ModeSocket {
@ -99,19 +113,19 @@ func writeTarGz(path string, tarWriter *tar.Writer, prefix string) error {
if fileInfo.Mode()&os.ModeSymlink == os.ModeSymlink {
var err error
if link, err = os.Readlink(path); err != nil {
return fmt.Errorf("writeTarGz: error resolving symlink %s: %w", path, err)
return fmt.Errorf("writeTarball: error resolving symlink %s: %w", path, err)
}
}
header, err := tar.FileInfoHeader(fileInfo, link)
if err != nil {
return fmt.Errorf("writeTarGz: error getting file info header: %w", err)
return fmt.Errorf("writeTarball: error getting file info header: %w", err)
}
header.Name = strings.TrimPrefix(path, prefix)
err = tarWriter.WriteHeader(header)
if err != nil {
return fmt.Errorf("writeTarGz: error writing file info header: %w", err)
return fmt.Errorf("writeTarball: error writing file info header: %w", err)
}
if !fileInfo.Mode().IsRegular() {
@ -120,13 +134,13 @@ func writeTarGz(path string, tarWriter *tar.Writer, prefix string) error {
file, err := os.Open(path)
if err != nil {
return fmt.Errorf("writeTarGz: error opening %s: %w", path, err)
return fmt.Errorf("writeTarball: error opening %s: %w", path, err)
}
defer file.Close()
_, err = io.Copy(tarWriter, file)
if err != nil {
return fmt.Errorf("writeTarGz: error copying %s to tar writer: %w", path, err)
return fmt.Errorf("writeTarball: error copying %s to tar writer: %w", path, err)
}
return nil

View File

@ -16,59 +16,60 @@ import (
// Config holds all configuration values that are expected to be set
// by users.
type Config struct {
AwsS3BucketName string `split_words:"true"`
AwsS3Path string `split_words:"true"`
AwsEndpoint string `split_words:"true" default:"s3.amazonaws.com"`
AwsEndpointProto string `split_words:"true" default:"https"`
AwsEndpointInsecure bool `split_words:"true"`
AwsEndpointCACert CertDecoder `envconfig:"AWS_ENDPOINT_CA_CERT"`
AwsStorageClass string `split_words:"true"`
AwsAccessKeyID string `envconfig:"AWS_ACCESS_KEY_ID"`
AwsAccessKeyIDFile string `envconfig:"AWS_ACCESS_KEY_ID_FILE"`
AwsSecretAccessKey string `split_words:"true"`
AwsSecretAccessKeyFile string `split_words:"true"`
AwsIamRoleEndpoint string `split_words:"true"`
AwsPartSize int64 `split_words:"true"`
BackupSources string `split_words:"true" default:"/backup"`
BackupFilename string `split_words:"true" default:"backup-%Y-%m-%dT%H-%M-%S.tar.gz"`
BackupFilenameExpand bool `split_words:"true"`
BackupLatestSymlink string `split_words:"true"`
BackupArchive string `split_words:"true" default:"/archive"`
BackupRetentionDays int32 `split_words:"true" default:"-1"`
BackupPruningLeeway time.Duration `split_words:"true" default:"1m"`
BackupPruningPrefix string `split_words:"true"`
BackupStopContainerLabel string `split_words:"true" default:"true"`
BackupFromSnapshot bool `split_words:"true"`
BackupExcludeRegexp RegexpDecoder `split_words:"true"`
GpgPassphrase string `split_words:"true"`
NotificationURLs []string `envconfig:"NOTIFICATION_URLS"`
NotificationLevel string `split_words:"true" default:"error"`
EmailNotificationRecipient string `split_words:"true"`
EmailNotificationSender string `split_words:"true" default:"noreply@nohost"`
EmailSMTPHost string `envconfig:"EMAIL_SMTP_HOST"`
EmailSMTPPort int `envconfig:"EMAIL_SMTP_PORT" default:"587"`
EmailSMTPUsername string `envconfig:"EMAIL_SMTP_USERNAME"`
EmailSMTPPassword string `envconfig:"EMAIL_SMTP_PASSWORD"`
WebdavUrl string `split_words:"true"`
WebdavUrlInsecure bool `split_words:"true"`
WebdavPath string `split_words:"true" default:"/"`
WebdavUsername string `split_words:"true"`
WebdavPassword string `split_words:"true"`
SSHHostName string `split_words:"true"`
SSHPort string `split_words:"true" default:"22"`
SSHUser string `split_words:"true"`
SSHPassword string `split_words:"true"`
SSHIdentityFile string `split_words:"true" default:"/root/.ssh/id_rsa"`
SSHIdentityPassphrase string `split_words:"true"`
SSHRemotePath string `split_words:"true"`
ExecLabel string `split_words:"true"`
ExecForwardOutput bool `split_words:"true"`
LockTimeout time.Duration `split_words:"true" default:"60m"`
AzureStorageAccountName string `split_words:"true"`
AzureStoragePrimaryAccountKey string `split_words:"true"`
AzureStorageContainerName string `split_words:"true"`
AzureStoragePath string `split_words:"true"`
AzureStorageEndpoint string `split_words:"true" default:"https://{{ .AccountName }}.blob.core.windows.net/"`
AwsS3BucketName string `split_words:"true"`
AwsS3Path string `split_words:"true"`
AwsEndpoint string `split_words:"true" default:"s3.amazonaws.com"`
AwsEndpointProto string `split_words:"true" default:"https"`
AwsEndpointInsecure bool `split_words:"true"`
AwsEndpointCACert CertDecoder `envconfig:"AWS_ENDPOINT_CA_CERT"`
AwsStorageClass string `split_words:"true"`
AwsAccessKeyID string `envconfig:"AWS_ACCESS_KEY_ID"`
AwsAccessKeyIDFile string `envconfig:"AWS_ACCESS_KEY_ID_FILE"`
AwsSecretAccessKey string `split_words:"true"`
AwsSecretAccessKeyFile string `split_words:"true"`
AwsIamRoleEndpoint string `split_words:"true"`
AwsPartSize int64 `split_words:"true"`
BackupCompression CompressionType `split_words:"true" default:"gz"`
BackupSources string `split_words:"true" default:"/backup"`
BackupFilename string `split_words:"true" default:"backup-%Y-%m-%dT%H-%M-%S.{{ .Extension }}"`
BackupFilenameExpand bool `split_words:"true"`
BackupLatestSymlink string `split_words:"true"`
BackupArchive string `split_words:"true" default:"/archive"`
BackupRetentionDays int32 `split_words:"true" default:"-1"`
BackupPruningLeeway time.Duration `split_words:"true" default:"1m"`
BackupPruningPrefix string `split_words:"true"`
BackupStopContainerLabel string `split_words:"true" default:"true"`
BackupFromSnapshot bool `split_words:"true"`
BackupExcludeRegexp RegexpDecoder `split_words:"true"`
GpgPassphrase string `split_words:"true"`
NotificationURLs []string `envconfig:"NOTIFICATION_URLS"`
NotificationLevel string `split_words:"true" default:"error"`
EmailNotificationRecipient string `split_words:"true"`
EmailNotificationSender string `split_words:"true" default:"noreply@nohost"`
EmailSMTPHost string `envconfig:"EMAIL_SMTP_HOST"`
EmailSMTPPort int `envconfig:"EMAIL_SMTP_PORT" default:"587"`
EmailSMTPUsername string `envconfig:"EMAIL_SMTP_USERNAME"`
EmailSMTPPassword string `envconfig:"EMAIL_SMTP_PASSWORD"`
WebdavUrl string `split_words:"true"`
WebdavUrlInsecure bool `split_words:"true"`
WebdavPath string `split_words:"true" default:"/"`
WebdavUsername string `split_words:"true"`
WebdavPassword string `split_words:"true"`
SSHHostName string `split_words:"true"`
SSHPort string `split_words:"true" default:"22"`
SSHUser string `split_words:"true"`
SSHPassword string `split_words:"true"`
SSHIdentityFile string `split_words:"true" default:"/root/.ssh/id_rsa"`
SSHIdentityPassphrase string `split_words:"true"`
SSHRemotePath string `split_words:"true"`
ExecLabel string `split_words:"true"`
ExecForwardOutput bool `split_words:"true"`
LockTimeout time.Duration `split_words:"true" default:"60m"`
AzureStorageAccountName string `split_words:"true"`
AzureStoragePrimaryAccountKey string `split_words:"true"`
AzureStorageContainerName string `split_words:"true"`
AzureStoragePath string `split_words:"true"`
AzureStorageEndpoint string `split_words:"true" default:"https://{{ .AccountName }}.blob.core.windows.net/"`
}
func (c *Config) resolveSecret(envVar string, secretPath string) (string, error) {
@ -82,6 +83,22 @@ func (c *Config) resolveSecret(envVar string, secretPath string) (string, error)
return string(data), nil
}
type CompressionType string
func (c *CompressionType) Decode(v string) error {
switch v {
case "gz", "zst":
*c = CompressionType(v)
return nil
default:
return fmt.Errorf("config: error decoding compression type %s", v)
}
}
func (c *CompressionType) String() string {
return string(*c)
}
type CertDecoder struct {
Cert *x509.Certificate
}

View File

@ -4,6 +4,7 @@
package main
import (
"bytes"
"context"
"errors"
"fmt"
@ -89,6 +90,20 @@ func newScript() (*script, error) {
}
s.file = path.Join("/tmp", s.c.BackupFilename)
tmplFileName, tErr := template.New("extension").Parse(s.file)
if tErr != nil {
return nil, fmt.Errorf("newScript: unable to parse backup file extension template: %w", tErr)
}
var bf bytes.Buffer
if tErr := tmplFileName.Execute(&bf, map[string]string{
"Extension": fmt.Sprintf("tar.%s", s.c.BackupCompression),
}); tErr != nil {
return nil, fmt.Errorf("newScript: error executing backup file extension template: %w", tErr)
}
s.file = bf.String()
if s.c.BackupFilenameExpand {
s.file = os.ExpandEnv(s.file)
s.c.BackupLatestSymlink = os.ExpandEnv(s.c.BackupLatestSymlink)
@ -454,7 +469,7 @@ func (s *script) createArchive() error {
return fmt.Errorf("createArchive: error walking filesystem tree: %w", err)
}
if err := createArchive(filesEligibleForBackup, backupSources, tarFile); err != nil {
if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String()); err != nil {
return fmt.Errorf("createArchive: error compressing backup folder: %w", err)
}

2
go.mod
View File

@ -10,6 +10,7 @@ require (
github.com/docker/docker v24.0.5+incompatible
github.com/gofrs/flock v0.8.1
github.com/kelseyhightower/envconfig v1.4.0
github.com/klauspost/compress v1.16.7
github.com/leekchan/timeutil v0.0.0-20150802142658-28917288c48d
github.com/minio/minio-go/v7 v7.0.61
github.com/otiai10/copy v1.11.0
@ -33,7 +34,6 @@ require (
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect

66
test/cli-zstd/run.sh Executable file
View File

@ -0,0 +1,66 @@
#!/bin/sh
set -e
cd $(dirname $0)
. ../util.sh
current_test=$(basename $(pwd))
docker network create test_network
docker volume create backup_data
docker volume create app_data
# This volume is created to test whether empty directories are handled
# correctly. It is not supposed to hold any data.
docker volume create empty_data
docker run -d \
--name minio \
--network test_network \
--env MINIO_ROOT_USER=test \
--env MINIO_ROOT_PASSWORD=test \
--env MINIO_ACCESS_KEY=test \
--env MINIO_SECRET_KEY=GMusLtUmILge2by+z890kQ \
-v backup_data:/data \
minio/minio:RELEASE.2020-08-04T23-10-51Z server /data
docker exec minio mkdir -p /data/backup
docker run -d \
--name offen \
--network test_network \
-v app_data:/var/opt/offen/ \
offen/offen:latest
sleep 10
docker run --rm \
--network test_network \
-v app_data:/backup/app_data \
-v empty_data:/backup/empty_data \
-v /var/run/docker.sock:/var/run/docker.sock \
--env AWS_ACCESS_KEY_ID=test \
--env AWS_SECRET_ACCESS_KEY=GMusLtUmILge2by+z890kQ \
--env AWS_ENDPOINT=minio:9000 \
--env AWS_ENDPOINT_PROTO=http \
--env AWS_S3_BUCKET_NAME=backup \
--env BACKUP_COMPRESSION=zst \
--env BACKUP_FILENAME='test.{{ .Extension }}' \
--env "BACKUP_FROM_SNAPSHOT=true" \
--entrypoint backup \
offen/docker-volume-backup:${TEST_VERSION:-canary}
# Have to install tar and zstd on Alpine because the plain image comes with very
# basic tar from busybox and it does not seem to support zstd
docker run --rm \
-v backup_data:/data alpine \
ash -c 'apk add --no-cache zstd tar && tar -xvf /data/backup/test.tar.zst --zstd && test -f /backup/app_data/offen.db && test -d /backup/empty_data'
pass "Found relevant files in untared remote backup."
# This test does not stop containers during backup. This is happening on
# purpose in order to cover this setup as well.
expect_running_containers "2"
docker rm $(docker stop minio offen)
docker volume rm backup_data app_data
docker network rm test_network