From da8c63f7552113464993270e34af36c32a90d51d Mon Sep 17 00:00:00 2001 From: Frederik Ring Date: Fri, 25 Mar 2022 18:26:34 +0100 Subject: [PATCH] Support identical cron schedule (#87) * Retry on lock being unavailable * Refactor locking to return plain error * Collect LockedTime in stats * Add test case * Add documentation for LOCK_TIMEOUT * Log in case lock needs to be awaited * Release resources created for awaiting lock --- README.md | 13 +++++- cmd/backup/config.go | 1 + cmd/backup/lock.go | 58 +++++++++++++++++++++++++ cmd/backup/main.go | 7 +-- cmd/backup/script.go | 2 + cmd/backup/stats.go | 1 + cmd/backup/util.go | 17 -------- docs/NOTIFICATION-TEMPLATES.md | 1 + test/confd/{backup.env => 01backup.env} | 0 test/confd/02backup.env | 2 + test/confd/{never.env => 03never.env} | 0 test/confd/docker-compose.yml | 5 ++- test/confd/run.sh | 6 +++ 13 files changed, 90 insertions(+), 23 deletions(-) create mode 100644 cmd/backup/lock.go rename test/confd/{backup.env => 01backup.env} (100%) create mode 100644 test/confd/02backup.env rename test/confd/{never.env => 03never.env} (100%) diff --git a/README.md b/README.md index 6600b71..4c28f74 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,16 @@ You can populate below template according to your requirements and use it as you # DOCKER_HOST="tcp://docker_socket_proxy:2375" +########### LOCK_TIMEOUT + +# In the case of overlapping cron schedules run by the same container, +# subsequent invocations will wait for previous runs to finish before starting. +# By default, this will time out and fail in case the lock could not be acquired +# after 60 minutes. In case you need to adjust this timeout, supply a duration +# value as per https://pkg.go.dev/time#ParseDuration to `LOCK_TIMEOUT` + +# LOCK_TIMEOUT="60m" + ########### EMAIL NOTIFICATIONS # ************************************************************************ @@ -681,7 +691,8 @@ volumes: A separate cronjob will be created for each config file. If a configuration value is set both in the global environment as well as in the config file, the config file will take precedence. -The `backup` command expects to run on an exclusive lock, so it is your responsibility to make sure the invocations do not overlap. +The `backup` command expects to run on an exclusive lock, so in case you provide the same or overlapping schedules in your cron expressions, the runs will still be executed serially, one after the other. +The exact order of schedules that use the same cron expression is not specified. In case you need your schedules to overlap, you need to create a dedicated container for each schedule instead. When changing the configuration, you currently need to manually restart the container for the changes to take effect. diff --git a/cmd/backup/config.go b/cmd/backup/config.go index c1f481f..06c8c3a 100644 --- a/cmd/backup/config.go +++ b/cmd/backup/config.go @@ -41,4 +41,5 @@ type Config struct { WebdavPassword string `split_words:"true"` ExecLabel string `split_words:"true"` ExecForwardOutput bool `split_words:"true"` + LockTimeout time.Duration `split_words:"true" default:"60m"` } diff --git a/cmd/backup/lock.go b/cmd/backup/lock.go new file mode 100644 index 0000000..2bb5a79 --- /dev/null +++ b/cmd/backup/lock.go @@ -0,0 +1,58 @@ +// Copyright 2022 - Offen Authors +// SPDX-License-Identifier: MPL-2.0 + +package main + +import ( + "errors" + "fmt" + "time" + + "github.com/gofrs/flock" +) + +// lock opens a lockfile at the given location, keeping it locked until the +// caller invokes the returned release func. In case the lock is currently blocked +// by another execution, it will repeatedly retry until the lock is available +// or the given timeout is exceeded. +func (s *script) lock(lockfile string) (func() error, error) { + start := time.Now() + defer func() { + s.stats.LockedTime = time.Now().Sub(start) + }() + + retry := time.NewTicker(5 * time.Second) + defer retry.Stop() + deadline := time.NewTimer(s.c.LockTimeout) + defer deadline.Stop() + + fileLock := flock.New(lockfile) + + for { + acquired, err := fileLock.TryLock() + if err != nil { + return noop, fmt.Errorf("lock: error trying lock: %w", err) + } + if acquired { + if s.encounteredLock { + s.logger.Info("Acquired exclusive lock on subsequent attempt, ready to continue.") + } + return fileLock.Unlock, nil + } + + if !s.encounteredLock { + s.logger.Infof( + "Exclusive lock was not available on first attempt. Will retry until it becomes available or the timeout of %s is exceeded.", + s.c.LockTimeout, + ) + s.encounteredLock = true + } + + select { + case <-retry.C: + continue + case <-deadline.C: + return noop, errors.New("lock: timed out waiting for lockfile to become available") + } + } +} diff --git a/cmd/backup/main.go b/cmd/backup/main.go index 352cd4f..9da3410 100644 --- a/cmd/backup/main.go +++ b/cmd/backup/main.go @@ -8,14 +8,15 @@ import ( ) func main() { - unlock := lock("/var/lock/dockervolumebackup.lock") - defer unlock() - s, err := newScript() if err != nil { panic(err) } + unlock, err := s.lock("/var/lock/dockervolumebackup.lock") + defer unlock() + s.must(err) + defer func() { if pArg := recover(); pArg != nil { if err, ok := pArg.(error); ok { diff --git a/cmd/backup/script.go b/cmd/backup/script.go index 51dc0cb..c4ac244 100644 --- a/cmd/backup/script.go +++ b/cmd/backup/script.go @@ -47,6 +47,8 @@ type script struct { file string stats *Stats + encounteredLock bool + c *Config } diff --git a/cmd/backup/stats.go b/cmd/backup/stats.go index a6c9423..766a130 100644 --- a/cmd/backup/stats.go +++ b/cmd/backup/stats.go @@ -42,6 +42,7 @@ type Stats struct { StartTime time.Time EndTime time.Time TookTime time.Duration + LockedTime time.Duration LogOutput *bytes.Buffer Containers ContainersStats BackupFile BackupFileStats diff --git a/cmd/backup/util.go b/cmd/backup/util.go index 79121af..fd80da6 100644 --- a/cmd/backup/util.go +++ b/cmd/backup/util.go @@ -10,27 +10,10 @@ import ( "io" "os" "strings" - - "github.com/gofrs/flock" ) var noop = func() error { return nil } -// lock opens a lockfile at the given location, keeping it locked until the -// caller invokes the returned release func. When invoked while the file is -// still locked the function panics. -func lock(lockfile string) func() error { - fileLock := flock.New(lockfile) - acquired, err := fileLock.TryLock() - if err != nil { - panic(err) - } - if !acquired { - panic("unable to acquire file lock") - } - return fileLock.Unlock -} - // copy creates a copy of the file located at `dst` at `src`. func copyFile(src, dst string) error { in, err := os.Open(src) diff --git a/docs/NOTIFICATION-TEMPLATES.md b/docs/NOTIFICATION-TEMPLATES.md index a817d56..ed7957e 100644 --- a/docs/NOTIFICATION-TEMPLATES.md +++ b/docs/NOTIFICATION-TEMPLATES.md @@ -13,6 +13,7 @@ Here is a list of all data passed to the template: * `StartTime`: time when the script started execution * `EndTime`: time when the backup has completed successfully (after pruning) * `TookTime`: amount of time it took for the backup to run. (equal to `EndTime - StartTime`) + * `LockedTime`: amount of time it took for the backup to acquire the exclusive lock * `LogOutput`: full log of the application * `Containers`: object containing stats about the docker containers * `All`: total number of containers diff --git a/test/confd/backup.env b/test/confd/01backup.env similarity index 100% rename from test/confd/backup.env rename to test/confd/01backup.env diff --git a/test/confd/02backup.env b/test/confd/02backup.env new file mode 100644 index 0000000..7f33b66 --- /dev/null +++ b/test/confd/02backup.env @@ -0,0 +1,2 @@ +BACKUP_FILENAME="other.tar.gz" +BACKUP_CRON_EXPRESSION="*/1 * * * *" diff --git a/test/confd/never.env b/test/confd/03never.env similarity index 100% rename from test/confd/never.env rename to test/confd/03never.env diff --git a/test/confd/docker-compose.yml b/test/confd/docker-compose.yml index b74bb29..2fb423d 100644 --- a/test/confd/docker-compose.yml +++ b/test/confd/docker-compose.yml @@ -7,8 +7,9 @@ services: volumes: - ./local:/archive - app_data:/backup/app_data:ro - - ./backup.env:/etc/dockervolumebackup/conf.d/00backup.env - - ./never.env:/etc/dockervolumebackup/conf.d/10never.env + - ./01backup.env:/etc/dockervolumebackup/conf.d/01backup.env + - ./02backup.env:/etc/dockervolumebackup/conf.d/02backup.env + - ./03never.env:/etc/dockervolumebackup/conf.d/03never.env - /var/run/docker.sock:/var/run/docker.sock offen: diff --git a/test/confd/run.sh b/test/confd/run.sh index 7ec9076..15ab2ff 100755 --- a/test/confd/run.sh +++ b/test/confd/run.sh @@ -19,6 +19,12 @@ if [ ! -f ./local/conf.tar.gz ]; then fi echo "[TEST:PASS] Config from file was used." +if [ ! -f ./local/other.tar.gz ]; then + echo "[TEST:FAIL] Run on same schedule did not succeed." + exit 1 +fi +echo "[TEST:PASS] Run on same schedule succeeded." + if [ -f ./local/never.tar.gz ]; then echo "[TEST:FAIL] Unexpected file was found." exit 1