From b52b271bacc68404230e1bdeb2c8713f1a530f85 Mon Sep 17 00:00:00 2001 From: Frederik Ring Date: Sun, 8 May 2022 11:20:38 +0200 Subject: [PATCH] Allow for the exclusion of files from backups (#100) * Hoist walking of files so it can be used for features other than archive creation * Add option to ignore files from backup using glob patterns * Use Regexp instead of glob for exclusion * Ignore artifacts * Add teardown to test * Allow single Re for filtering only * Add documentation * Use MatchString on re, add bad input to message in case of error --- README.md | 6 ++++++ cmd/backup/archive.go | 15 +++------------ cmd/backup/config.go | 23 ++++++++++++++++++++++- cmd/backup/script.go | 23 ++++++++++++++++++++++- test/ignore/.gitignore | 1 + test/ignore/docker-compose.yml | 15 +++++++++++++++ test/ignore/run.sh | 27 +++++++++++++++++++++++++++ test/ignore/sources/me.txt | 0 test/ignore/sources/skip.me | 0 9 files changed, 96 insertions(+), 14 deletions(-) create mode 100644 test/ignore/.gitignore create mode 100644 test/ignore/docker-compose.yml create mode 100644 test/ignore/run.sh create mode 100644 test/ignore/sources/me.txt create mode 100644 test/ignore/sources/skip.me diff --git a/README.md b/README.md index c68e53d..8c5a6cd 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,12 @@ You can populate below template according to your requirements and use it as you # BACKUP_SOURCES="/other/location" +# When given, all files in BACKUP_SOURCES whose full path matches the given +# regular expression will be excluded from the archive. Regular Expressions +# can be used as from the Go standard library https://pkg.go.dev/regexp + +# BACKUP_EXCLUDE_REGEXP="\.log$" + ########### BACKUP STORAGE # The name of the remote bucket that should be used for storing backups. If diff --git a/cmd/backup/archive.go b/cmd/backup/archive.go index d419eac..46a9356 100644 --- a/cmd/backup/archive.go +++ b/cmd/backup/archive.go @@ -11,14 +11,13 @@ import ( "compress/gzip" "fmt" "io" - "io/fs" "os" "path" "path/filepath" "strings" ) -func createArchive(inputFilePath, outputFilePath string) error { +func createArchive(files []string, inputFilePath, outputFilePath string) error { inputFilePath = stripTrailingSlashes(inputFilePath) inputFilePath, outputFilePath, err := makeAbsolute(inputFilePath, outputFilePath) if err != nil { @@ -28,7 +27,7 @@ func createArchive(inputFilePath, outputFilePath string) error { return fmt.Errorf("createArchive: error creating output file path: %w", err) } - if err := compress(inputFilePath, outputFilePath, filepath.Dir(inputFilePath)); err != nil { + if err := compress(files, outputFilePath, filepath.Dir(inputFilePath)); err != nil { return fmt.Errorf("createArchive: error creating archive: %w", err) } @@ -52,7 +51,7 @@ func makeAbsolute(inputFilePath, outputFilePath string) (string, string, error) return inputFilePath, outputFilePath, err } -func compress(inPath, outFilePath, subPath string) error { +func compress(paths []string, outFilePath, subPath string) error { file, err := os.Create(outFilePath) if err != nil { return fmt.Errorf("compress: error creating out file: %w", err) @@ -62,14 +61,6 @@ func compress(inPath, outFilePath, subPath string) error { gzipWriter := gzip.NewWriter(file) tarWriter := tar.NewWriter(gzipWriter) - var paths []string - if err := filepath.WalkDir(inPath, func(path string, di fs.DirEntry, err error) error { - paths = append(paths, path) - return err - }); err != nil { - return fmt.Errorf("compress: error walking filesystem tree: %w", err) - } - for _, p := range paths { if err := writeTarGz(p, tarWriter, prefix); err != nil { return fmt.Errorf("compress error writing %s to archive: %w", p, err) diff --git a/cmd/backup/config.go b/cmd/backup/config.go index 4651d3a..7765446 100644 --- a/cmd/backup/config.go +++ b/cmd/backup/config.go @@ -3,7 +3,11 @@ package main -import "time" +import ( + "fmt" + "regexp" + "time" +) // Config holds all configuration values that are expected to be set // by users. @@ -18,6 +22,7 @@ type Config struct { BackupPruningPrefix string `split_words:"true"` BackupStopContainerLabel string `split_words:"true" default:"true"` BackupFromSnapshot bool `split_words:"true"` + BackupExcludeRegexp RegexpDecoder `split_words:"true"` AwsS3BucketName string `split_words:"true"` AwsS3Path string `split_words:"true"` AwsEndpoint string `split_words:"true" default:"s3.amazonaws.com"` @@ -44,3 +49,19 @@ type Config struct { ExecForwardOutput bool `split_words:"true"` LockTimeout time.Duration `split_words:"true" default:"60m"` } + +type RegexpDecoder struct { + Re *regexp.Regexp +} + +func (r *RegexpDecoder) Decode(v string) error { + if v == "" { + return nil + } + re, err := regexp.Compile(v) + if err != nil { + return fmt.Errorf("config: error compiling given regexp `%s`: %w", v, err) + } + *r = RegexpDecoder{Re: re} + return nil +} diff --git a/cmd/backup/script.go b/cmd/backup/script.go index 26c67a3..26f88e9 100644 --- a/cmd/backup/script.go +++ b/cmd/backup/script.go @@ -398,7 +398,28 @@ func (s *script) takeBackup() error { s.logger.Infof("Removed tar file `%s`.", tarFile) return nil }) - if err := createArchive(backupSources, tarFile); err != nil { + + backupPath, err := filepath.Abs(stripTrailingSlashes(backupSources)) + if err != nil { + return fmt.Errorf("takeBackup: error getting absolute path: %w", err) + } + + var filesEligibleForBackup []string + if err := filepath.WalkDir(backupPath, func(path string, di fs.DirEntry, err error) error { + if err != nil { + return err + } + + if s.c.BackupExcludeRegexp.Re != nil && s.c.BackupExcludeRegexp.Re.MatchString(path) { + return nil + } + filesEligibleForBackup = append(filesEligibleForBackup, path) + return nil + }); err != nil { + return fmt.Errorf("compress: error walking filesystem tree: %w", err) + } + + if err := createArchive(filesEligibleForBackup, backupSources, tarFile); err != nil { return fmt.Errorf("takeBackup: error compressing backup folder: %w", err) } diff --git a/test/ignore/.gitignore b/test/ignore/.gitignore new file mode 100644 index 0000000..4083037 --- /dev/null +++ b/test/ignore/.gitignore @@ -0,0 +1 @@ +local diff --git a/test/ignore/docker-compose.yml b/test/ignore/docker-compose.yml new file mode 100644 index 0000000..3b02ae0 --- /dev/null +++ b/test/ignore/docker-compose.yml @@ -0,0 +1,15 @@ +version: '3.8' + +services: + backup: + image: offen/docker-volume-backup:${TEST_VERSION:-canary} + deploy: + restart_policy: + condition: on-failure + environment: + BACKUP_FILENAME: test.tar.gz + BACKUP_CRON_EXPRESSION: 0 0 5 31 2 ? + BACKUP_EXCLUDE_REGEXP: '\.(me|you)$$' + volumes: + - ./local:/archive + - ./sources:/backup/data:ro diff --git a/test/ignore/run.sh b/test/ignore/run.sh new file mode 100644 index 0000000..eee6ad5 --- /dev/null +++ b/test/ignore/run.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +set -e + +cd $(dirname $0) +mkdir -p local + +docker-compose up -d +sleep 5 +docker-compose exec backup backup + +docker-compose down --volumes + +out=$(mktemp -d) +sudo tar --same-owner -xvf ./local/test.tar.gz -C "$out" + +if [ ! -f "$out/backup/data/me.txt" ]; then + echo "[TEST:FAIL] Expected file was not found." + exit 1 +fi +echo "[TEST:PASS] Expected file was found." + +if [ -f "$out/backup/data/skip.me" ]; then + echo "[TEST:FAIL] Ignored file was found." + exit 1 +fi +echo "[TEST:PASS] Ignored file was not found." diff --git a/test/ignore/sources/me.txt b/test/ignore/sources/me.txt new file mode 100644 index 0000000..e69de29 diff --git a/test/ignore/sources/skip.me b/test/ignore/sources/skip.me new file mode 100644 index 0000000..e69de29