diff --git a/cmd/backup/config.go b/cmd/backup/config.go index 0a5122e..3afbe25 100644 --- a/cmd/backup/config.go +++ b/cmd/backup/config.go @@ -38,6 +38,7 @@ type Config struct { BackupPruningLeeway time.Duration `split_words:"true" default:"1m"` BackupPruningPrefix string `split_words:"true"` BackupStopContainerLabel string `split_words:"true" default:"true"` + BackupStopServiceTimeout time.Duration `split_words:"true" default:"5m"` BackupFromSnapshot bool `split_words:"true"` BackupExcludeRegexp RegexpDecoder `split_words:"true"` BackupSkipBackendsFromPrune []string `split_words:"true"` diff --git a/cmd/backup/docker.go b/cmd/backup/docker.go index 340e14f..ca3265b 100644 --- a/cmd/backup/docker.go +++ b/cmd/backup/docker.go @@ -41,9 +41,9 @@ func scaleService(cli *client.Client, serviceID string, replicas uint64) ([]stri return response.Warnings, nil } -func awaitContainerCountForService(cli *client.Client, serviceID string, count int) error { +func awaitContainerCountForService(cli *client.Client, serviceID string, count int, timeoutAfter time.Duration) error { poll := time.NewTicker(time.Second) - timeout := time.NewTimer(5 * time.Minute) + timeout := time.NewTimer(timeoutAfter) defer timeout.Stop() defer poll.Stop() @@ -51,7 +51,8 @@ func awaitContainerCountForService(cli *client.Client, serviceID string, count i select { case <-timeout.C: return fmt.Errorf( - "awaitContainerCount: timed out after waiting 5 minutes for service %s to reach desired container count of %d", + "awaitContainerCount: timed out after waiting %s for service %s to reach desired container count of %d", + timeoutAfter, serviceID, count, ) @@ -196,7 +197,7 @@ func (s *script) stopContainersAndServices() (func() error, error) { } // progress.ServiceProgress returns too early, so we need to manually check // whether all containers belonging to the service have actually been removed - if err := awaitContainerCountForService(s.cli, svc.serviceID, 0); err != nil { + if err := awaitContainerCountForService(s.cli, svc.serviceID, 0, s.c.BackupStopServiceTimeout); err != nil { scaleDownErrors.append(err) } }(svc) diff --git a/docs/reference/index.md b/docs/reference/index.md index cdbe3dc..9eb157d 100644 --- a/docs/reference/index.md +++ b/docs/reference/index.md @@ -326,6 +326,14 @@ You can populate below template according to your requirements and use it as you # BACKUP_STOP_CONTAINER_LABEL="service1" +# When trying to scale down Docker Swarm services, give up after +# the specified amount of time in case the service has not converged yet. +# In case you need to adjust this timeout, supply a duration +# value as per https://pkg.go.dev/time#ParseDuration to `BACKUP_STOP_SERVICE_TIMEOUT`. +# Defaults to 5 minutes. + +# BACKUP_STOP_SERVICE_TIMEOUT="5m" + ########### EXECUTING COMMANDS IN CONTAINERS PRE/POST BACKUP # It is possible to define commands to be run in any container before and after