Replace Gzip with PGzip (#266)

* Replace Gzip with optimized PGzip. Add concurrency option.

* Add shortened timeout for 'dc down' too.

* Add NaturalNumberZero to allow zero.

* Add test for concurrency=0

* Rename to GZIP_PARALLELISM

* Fix block size. Fix compression level. Fix CI.

* Refactor compression writer fetching. Renamed WholeNumber
This commit is contained in:
MaxJa4 2023-09-03 16:49:52 +02:00 committed by GitHub
parent 1e39ac41f4
commit 336c5bed71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 114 additions and 17 deletions

View File

@ -159,6 +159,13 @@ You can populate below template according to your requirements and use it as you
# BACKUP_COMPRESSION="gz"
# Parallelism level for "gz" (Gzip) compression.
# Defines how many blocks of data are concurrently processed.
# Higher values result in faster compression. No effect on decompression
# Default = 1. Setting this to 0 will use all available threads.
# GZIP_PARALLELISM=1
# The name of the backup file including the extension.
# Format verbs will be replaced as in `strftime`. Omitting them
# will result in the same filename for every backup run, which means previous

View File

@ -8,18 +8,20 @@ package main
import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"github.com/klauspost/pgzip"
"github.com/klauspost/compress/zstd"
)
func createArchive(files []string, inputFilePath, outputFilePath string, compression string) error {
func createArchive(files []string, inputFilePath, outputFilePath string, compression string, compressionConcurrency int) error {
inputFilePath = stripTrailingSlashes(inputFilePath)
inputFilePath, outputFilePath, err := makeAbsolute(inputFilePath, outputFilePath)
if err != nil {
@ -29,7 +31,7 @@ func createArchive(files []string, inputFilePath, outputFilePath string, compres
return fmt.Errorf("createArchive: error creating output file path: %w", err)
}
if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression); err != nil {
if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression, compressionConcurrency); err != nil {
return fmt.Errorf("createArchive: error creating archive: %w", err)
}
@ -53,26 +55,17 @@ func makeAbsolute(inputFilePath, outputFilePath string) (string, string, error)
return inputFilePath, outputFilePath, err
}
func compress(paths []string, outFilePath, subPath string, algo string) error {
func compress(paths []string, outFilePath, subPath string, algo string, concurrency int) error {
file, err := os.Create(outFilePath)
var compressWriter io.WriteCloser
if err != nil {
return fmt.Errorf("compress: error creating out file: %w", err)
}
prefix := path.Dir(outFilePath)
switch algo {
case "gz":
compressWriter = gzip.NewWriter(file)
case "zst":
compressWriter, err = zstd.NewWriter(file)
compressWriter, err := getCompressionWriter(file, algo, concurrency)
if err != nil {
return fmt.Errorf("compress: zstd error: %w", err)
return fmt.Errorf("compress: error getting compression writer: %w", err)
}
default:
return fmt.Errorf("compress: unsupported compression algorithm: %s", algo)
}
tarWriter := tar.NewWriter(compressWriter)
for _, p := range paths {
@ -99,6 +92,34 @@ func compress(paths []string, outFilePath, subPath string, algo string) error {
return nil
}
func getCompressionWriter(file *os.File, algo string, concurrency int) (io.WriteCloser, error) {
switch algo {
case "gz":
w, err := pgzip.NewWriterLevel(file, 5)
if err != nil {
return nil, fmt.Errorf("getCompressionWriter: gzip error: %w", err)
}
if concurrency == 0 {
concurrency = runtime.GOMAXPROCS(0)
}
if err := w.SetConcurrency(1<<20, concurrency); err != nil {
return nil, fmt.Errorf("getCompressionWriter: error setting concurrency: %w", err)
}
return w, nil
case "zst":
compressWriter, err := zstd.NewWriter(file)
if err != nil {
return nil, fmt.Errorf("getCompressionWriter: zstd error: %w", err)
}
return compressWriter, nil
default:
return nil, fmt.Errorf("getCompressionWriter: unsupported compression algorithm: %s", algo)
}
}
func writeTarball(path string, tarWriter *tar.Writer, prefix string) error {
fileInfo, err := os.Lstat(path)
if err != nil {

View File

@ -28,6 +28,7 @@ type Config struct {
AwsIamRoleEndpoint string `split_words:"true"`
AwsPartSize int64 `split_words:"true"`
BackupCompression CompressionType `split_words:"true" default:"gz"`
GzipParallelism WholeNumber `split_words:"true" default:"1"`
BackupSources string `split_words:"true" default:"/backup"`
BackupFilename string `split_words:"true" default:"backup-%Y-%m-%dT%H-%M-%S.{{ .Extension }}"`
BackupFilenameExpand bool `split_words:"true"`
@ -131,6 +132,7 @@ func (r *RegexpDecoder) Decode(v string) error {
return nil
}
// NaturalNumber is a type that can be used to decode a positive, non-zero natural number
type NaturalNumber int
func (n *NaturalNumber) Decode(v string) error {
@ -148,3 +150,22 @@ func (n *NaturalNumber) Decode(v string) error {
func (n *NaturalNumber) Int() int {
return int(*n)
}
// WholeNumber is a type that can be used to decode a positive whole number, including zero
type WholeNumber int
func (n *WholeNumber) Decode(v string) error {
asInt, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("config: error converting %s to int", v)
}
if asInt < 0 {
return fmt.Errorf("config: expected a whole, positive number, including zero. Got %d", asInt)
}
*n = WholeNumber(asInt)
return nil
}
func (n *WholeNumber) Int() int {
return int(*n)
}

View File

@ -503,7 +503,7 @@ func (s *script) createArchive() error {
return fmt.Errorf("createArchive: error walking filesystem tree: %w", err)
}
if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String()); err != nil {
if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String(), s.c.GzipParallelism.Int()); err != nil {
return fmt.Errorf("createArchive: error compressing backup folder: %w", err)
}

1
go.mod
View File

@ -45,6 +45,7 @@ require (
github.com/google/uuid v1.3.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/klauspost/pgzip v1.2.6
github.com/kr/fs v0.1.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect

2
go.sum
View File

@ -458,6 +458,8 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=

42
test/pgzip/run.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/sh
set -e
cd $(dirname $0)
. ../util.sh
current_test=$(basename $(pwd))
docker network create test_network
docker volume create app_data
LOCAL_DIR=$(mktemp -d)
docker run -d -q \
--name offen \
--network test_network \
-v app_data:/var/opt/offen/ \
offen/offen:latest
sleep 5
docker run --rm -q \
--network test_network \
-v app_data:/backup/app_data \
-v $LOCAL_DIR:/archive \
-v /var/run/docker.sock:/var/run/docker.sock \
--env BACKUP_COMPRESSION=gz \
--env GZIP_PARALLELISM=0 \
--env BACKUP_FILENAME='test.{{ .Extension }}' \
--entrypoint backup \
offen/docker-volume-backup:${TEST_VERSION:-canary}
tmp_dir=$(mktemp -d)
tar -xvf "$LOCAL_DIR/test.tar.gz" -C $tmp_dir
if [ ! -f "$tmp_dir/backup/app_data/offen.db" ]; then
fail "Could not find expected file in untared archive."
fi
pass "Found relevant files in untared local backup."
# This test does not stop containers during backup. This is happening on
# purpose in order to cover this setup as well.
expect_running_containers "1"

View File

@ -35,6 +35,9 @@ docker() {
up)
shift
command docker compose up --timeout 3 "$@";;
down)
shift
command docker compose down --timeout 3 "$@";;
*)
command docker compose "$@";;
esac