From 336c5bed71c1505e16ac05ac89134ace7354ed21 Mon Sep 17 00:00:00 2001 From: MaxJa4 <74194322+MaxJa4@users.noreply.github.com> Date: Sun, 3 Sep 2023 16:49:52 +0200 Subject: [PATCH] Replace Gzip with PGzip (#266) * Replace Gzip with optimized PGzip. Add concurrency option. * Add shortened timeout for 'dc down' too. * Add NaturalNumberZero to allow zero. * Add test for concurrency=0 * Rename to GZIP_PARALLELISM * Fix block size. Fix compression level. Fix CI. * Refactor compression writer fetching. Renamed WholeNumber --- README.md | 7 ++++++ cmd/backup/archive.go | 53 ++++++++++++++++++++++++++++++------------- cmd/backup/config.go | 21 +++++++++++++++++ cmd/backup/script.go | 2 +- go.mod | 1 + go.sum | 2 ++ test/pgzip/run.sh | 42 ++++++++++++++++++++++++++++++++++ test/util.sh | 3 +++ 8 files changed, 114 insertions(+), 17 deletions(-) create mode 100755 test/pgzip/run.sh diff --git a/README.md b/README.md index 2c9e1b3..46642d4 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,13 @@ You can populate below template according to your requirements and use it as you # BACKUP_COMPRESSION="gz" +# Parallelism level for "gz" (Gzip) compression. +# Defines how many blocks of data are concurrently processed. +# Higher values result in faster compression. No effect on decompression +# Default = 1. Setting this to 0 will use all available threads. + +# GZIP_PARALLELISM=1 + # The name of the backup file including the extension. # Format verbs will be replaced as in `strftime`. Omitting them # will result in the same filename for every backup run, which means previous diff --git a/cmd/backup/archive.go b/cmd/backup/archive.go index 8dd4c4a..32297e4 100644 --- a/cmd/backup/archive.go +++ b/cmd/backup/archive.go @@ -8,18 +8,20 @@ package main import ( "archive/tar" - "compress/gzip" "fmt" "io" "os" "path" "path/filepath" + "runtime" "strings" + "github.com/klauspost/pgzip" + "github.com/klauspost/compress/zstd" ) -func createArchive(files []string, inputFilePath, outputFilePath string, compression string) error { +func createArchive(files []string, inputFilePath, outputFilePath string, compression string, compressionConcurrency int) error { inputFilePath = stripTrailingSlashes(inputFilePath) inputFilePath, outputFilePath, err := makeAbsolute(inputFilePath, outputFilePath) if err != nil { @@ -29,7 +31,7 @@ func createArchive(files []string, inputFilePath, outputFilePath string, compres return fmt.Errorf("createArchive: error creating output file path: %w", err) } - if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression); err != nil { + if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression, compressionConcurrency); err != nil { return fmt.Errorf("createArchive: error creating archive: %w", err) } @@ -53,26 +55,17 @@ func makeAbsolute(inputFilePath, outputFilePath string) (string, string, error) return inputFilePath, outputFilePath, err } -func compress(paths []string, outFilePath, subPath string, algo string) error { +func compress(paths []string, outFilePath, subPath string, algo string, concurrency int) error { file, err := os.Create(outFilePath) - var compressWriter io.WriteCloser if err != nil { return fmt.Errorf("compress: error creating out file: %w", err) } prefix := path.Dir(outFilePath) - switch algo { - case "gz": - compressWriter = gzip.NewWriter(file) - case "zst": - compressWriter, err = zstd.NewWriter(file) - if err != nil { - return fmt.Errorf("compress: zstd error: %w", err) - } - default: - return fmt.Errorf("compress: unsupported compression algorithm: %s", algo) + compressWriter, err := getCompressionWriter(file, algo, concurrency) + if err != nil { + return fmt.Errorf("compress: error getting compression writer: %w", err) } - tarWriter := tar.NewWriter(compressWriter) for _, p := range paths { @@ -99,6 +92,34 @@ func compress(paths []string, outFilePath, subPath string, algo string) error { return nil } +func getCompressionWriter(file *os.File, algo string, concurrency int) (io.WriteCloser, error) { + switch algo { + case "gz": + w, err := pgzip.NewWriterLevel(file, 5) + if err != nil { + return nil, fmt.Errorf("getCompressionWriter: gzip error: %w", err) + } + + if concurrency == 0 { + concurrency = runtime.GOMAXPROCS(0) + } + + if err := w.SetConcurrency(1<<20, concurrency); err != nil { + return nil, fmt.Errorf("getCompressionWriter: error setting concurrency: %w", err) + } + + return w, nil + case "zst": + compressWriter, err := zstd.NewWriter(file) + if err != nil { + return nil, fmt.Errorf("getCompressionWriter: zstd error: %w", err) + } + return compressWriter, nil + default: + return nil, fmt.Errorf("getCompressionWriter: unsupported compression algorithm: %s", algo) + } +} + func writeTarball(path string, tarWriter *tar.Writer, prefix string) error { fileInfo, err := os.Lstat(path) if err != nil { diff --git a/cmd/backup/config.go b/cmd/backup/config.go index 82f4b0a..0a5122e 100644 --- a/cmd/backup/config.go +++ b/cmd/backup/config.go @@ -28,6 +28,7 @@ type Config struct { AwsIamRoleEndpoint string `split_words:"true"` AwsPartSize int64 `split_words:"true"` BackupCompression CompressionType `split_words:"true" default:"gz"` + GzipParallelism WholeNumber `split_words:"true" default:"1"` BackupSources string `split_words:"true" default:"/backup"` BackupFilename string `split_words:"true" default:"backup-%Y-%m-%dT%H-%M-%S.{{ .Extension }}"` BackupFilenameExpand bool `split_words:"true"` @@ -131,6 +132,7 @@ func (r *RegexpDecoder) Decode(v string) error { return nil } +// NaturalNumber is a type that can be used to decode a positive, non-zero natural number type NaturalNumber int func (n *NaturalNumber) Decode(v string) error { @@ -148,3 +150,22 @@ func (n *NaturalNumber) Decode(v string) error { func (n *NaturalNumber) Int() int { return int(*n) } + +// WholeNumber is a type that can be used to decode a positive whole number, including zero +type WholeNumber int + +func (n *WholeNumber) Decode(v string) error { + asInt, err := strconv.Atoi(v) + if err != nil { + return fmt.Errorf("config: error converting %s to int", v) + } + if asInt < 0 { + return fmt.Errorf("config: expected a whole, positive number, including zero. Got %d", asInt) + } + *n = WholeNumber(asInt) + return nil +} + +func (n *WholeNumber) Int() int { + return int(*n) +} diff --git a/cmd/backup/script.go b/cmd/backup/script.go index 0c12ad7..c370acb 100644 --- a/cmd/backup/script.go +++ b/cmd/backup/script.go @@ -503,7 +503,7 @@ func (s *script) createArchive() error { return fmt.Errorf("createArchive: error walking filesystem tree: %w", err) } - if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String()); err != nil { + if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String(), s.c.GzipParallelism.Int()); err != nil { return fmt.Errorf("createArchive: error compressing backup folder: %w", err) } diff --git a/go.mod b/go.mod index ff5a987..20f88e0 100644 --- a/go.mod +++ b/go.mod @@ -45,6 +45,7 @@ require ( github.com/google/uuid v1.3.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/klauspost/pgzip v1.2.6 github.com/kr/fs v0.1.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect diff --git a/go.sum b/go.sum index 8b3587b..7b4fd02 100644 --- a/go.sum +++ b/go.sum @@ -458,6 +458,8 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= +github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= diff --git a/test/pgzip/run.sh b/test/pgzip/run.sh new file mode 100755 index 0000000..ff6075e --- /dev/null +++ b/test/pgzip/run.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +set -e + +cd $(dirname $0) +. ../util.sh +current_test=$(basename $(pwd)) + +docker network create test_network +docker volume create app_data + +LOCAL_DIR=$(mktemp -d) + +docker run -d -q \ + --name offen \ + --network test_network \ + -v app_data:/var/opt/offen/ \ + offen/offen:latest + +sleep 5 + +docker run --rm -q \ + --network test_network \ + -v app_data:/backup/app_data \ + -v $LOCAL_DIR:/archive \ + -v /var/run/docker.sock:/var/run/docker.sock \ + --env BACKUP_COMPRESSION=gz \ + --env GZIP_PARALLELISM=0 \ + --env BACKUP_FILENAME='test.{{ .Extension }}' \ + --entrypoint backup \ + offen/docker-volume-backup:${TEST_VERSION:-canary} + +tmp_dir=$(mktemp -d) +tar -xvf "$LOCAL_DIR/test.tar.gz" -C $tmp_dir +if [ ! -f "$tmp_dir/backup/app_data/offen.db" ]; then + fail "Could not find expected file in untared archive." +fi +pass "Found relevant files in untared local backup." + +# This test does not stop containers during backup. This is happening on +# purpose in order to cover this setup as well. +expect_running_containers "1" diff --git a/test/util.sh b/test/util.sh index 6927ad3..6de8606 100644 --- a/test/util.sh +++ b/test/util.sh @@ -35,6 +35,9 @@ docker() { up) shift command docker compose up --timeout 3 "$@";; + down) + shift + command docker compose down --timeout 3 "$@";; *) command docker compose "$@";; esac