docker-volume-backup/cmd/backup/main.go

552 lines
15 KiB
Go
Raw Normal View History

2021-08-22 18:07:32 +02:00
// Copyright 2021 - Offen Authors <hioffen@posteo.de>
// SPDX-License-Identifier: MPL-2.0
package main
import (
"bytes"
"context"
"errors"
"fmt"
2021-08-21 21:26:27 +02:00
"io"
"os"
2021-08-21 21:26:27 +02:00
"path"
2021-08-22 14:00:21 +02:00
"path/filepath"
"strings"
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
2021-08-23 18:46:49 +02:00
"github.com/gofrs/flock"
"github.com/kelseyhightower/envconfig"
2021-08-22 21:06:51 +02:00
"github.com/leekchan/timeutil"
2021-09-03 19:06:42 +02:00
"github.com/m90/targz"
2021-08-24 11:39:27 +02:00
"github.com/minio/minio-go/v7"
2021-08-21 21:26:27 +02:00
"github.com/minio/minio-go/v7/pkg/credentials"
2021-08-22 16:41:06 +02:00
"github.com/sirupsen/logrus"
2021-08-22 14:44:33 +02:00
"golang.org/x/crypto/openpgp"
)
func main() {
unlock := lock("/var/lock/dockervolumebackup.lock")
defer unlock()
2021-08-21 19:26:42 +02:00
2021-08-24 11:39:27 +02:00
s, err := newScript()
if err != nil {
panic(err)
}
s.must(func() error {
restartContainers, err := s.stopContainers()
2021-08-26 16:22:24 +02:00
defer func() {
s.must(restartContainers())
}()
2021-08-24 11:39:27 +02:00
if err != nil {
return err
}
return s.takeBackup()
}())
2021-08-22 19:37:48 +02:00
s.must(s.encryptBackup())
s.must(s.copyBackup())
2021-08-24 11:39:27 +02:00
s.must(s.removeArtifacts())
2021-08-22 19:37:48 +02:00
s.must(s.pruneOldBackups())
s.logger.Info("Finished running backup tasks.")
2021-08-21 19:26:42 +02:00
}
// script holds all the stateful information required to orchestrate a
// single backup run.
2021-08-21 19:26:42 +02:00
type script struct {
2021-08-22 22:02:19 +02:00
cli *client.Client
mc *minio.Client
logger *logrus.Logger
start time.Time
file string
output *bytes.Buffer
2021-08-22 22:02:19 +02:00
c *config
}
type config struct {
BackupSources string `split_words:"true" default:"/backup"`
BackupFilename string `split_words:"true" default:"backup-%Y-%m-%dT%H-%M-%S.tar.gz"`
BackupArchive string `split_words:"true" default:"/archive"`
BackupRetentionDays int32 `split_words:"true" default:"-1"`
BackupPruningLeeway time.Duration `split_words:"true" default:"1m"`
BackupPruningPrefix string `split_words:"true"`
BackupStopContainerLabel string `split_words:"true" default:"true"`
AwsS3BucketName string `split_words:"true"`
AwsEndpoint string `split_words:"true" default:"s3.amazonaws.com"`
AwsEndpointProto string `split_words:"true" default:"https"`
AwsEndpointInsecure bool `split_words:"true"`
AwsAccessKeyID string `envconfig:"AWS_ACCESS_KEY_ID"`
AwsSecretAccessKey string `split_words:"true"`
GpgPassphrase string `split_words:"true"`
2021-08-21 19:26:42 +02:00
}
2021-08-24 11:39:27 +02:00
// newScript creates all resources needed for the script to perform actions against
// remote resources like the Docker engine or remote storage locations. All
// reading from env vars or other configuration sources is expected to happen
// in this method.
2021-08-24 11:39:27 +02:00
func newScript() (*script, error) {
stdOut, logBuffer := buffer(os.Stdout)
2021-08-24 11:39:27 +02:00
s := &script{
2021-08-29 18:26:40 +02:00
c: &config{},
2021-08-24 11:39:27 +02:00
logger: &logrus.Logger{
Out: stdOut,
2021-08-24 11:39:27 +02:00
Formatter: new(logrus.TextFormatter),
Hooks: make(logrus.LevelHooks),
Level: logrus.InfoLevel,
},
start: time.Now(),
output: logBuffer,
2021-08-24 11:39:27 +02:00
}
2021-08-21 19:26:42 +02:00
if err := envconfig.Process("", s.c); err != nil {
2021-08-24 11:39:27 +02:00
return nil, fmt.Errorf("newScript: failed to process configuration values: %w", err)
2021-08-21 19:26:42 +02:00
}
2021-08-24 11:39:27 +02:00
s.file = path.Join("/tmp", s.c.BackupFilename)
_, err := os.Stat("/var/run/docker.sock")
if !os.IsNotExist(err) {
2021-08-21 19:26:42 +02:00
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
2021-08-24 11:39:27 +02:00
return nil, fmt.Errorf("newScript: failed to create docker client")
}
2021-08-21 19:26:42 +02:00
s.cli = cli
}
2021-08-21 21:26:27 +02:00
if s.c.AwsS3BucketName != "" {
mc, err := minio.New(s.c.AwsEndpoint, &minio.Options{
2021-08-21 21:26:27 +02:00
Creds: credentials.NewStaticV4(
s.c.AwsAccessKeyID,
s.c.AwsSecretAccessKey,
2021-08-21 21:26:27 +02:00
"",
),
Secure: !s.c.AwsEndpointInsecure && s.c.AwsEndpointProto == "https",
2021-08-21 21:26:27 +02:00
})
if err != nil {
2021-08-24 11:39:27 +02:00
return nil, fmt.Errorf("newScript: error setting up minio client: %w", err)
2021-08-21 21:26:27 +02:00
}
s.mc = mc
}
2021-08-22 16:41:06 +02:00
2021-08-24 11:39:27 +02:00
return s, nil
2021-08-21 19:26:42 +02:00
}
2021-08-24 11:39:27 +02:00
var noop = func() error { return nil }
// stopContainers stops all Docker containers that are marked as to being
// stopped during the backup and returns a function that can be called to
// restart everything that has been stopped.
func (s *script) stopContainers() (func() error, error) {
2021-08-21 19:26:42 +02:00
if s.cli == nil {
2021-08-24 11:39:27 +02:00
return noop, nil
2021-08-21 19:26:42 +02:00
}
2021-08-29 18:26:40 +02:00
allContainers, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
Quiet: true,
})
if err != nil {
2021-08-24 11:39:27 +02:00
return noop, fmt.Errorf("stopContainersAndRun: error querying for containers: %w", err)
}
2021-08-22 19:37:48 +02:00
containerLabel := fmt.Sprintf(
"docker-volume-backup.stop-during-backup=%s",
s.c.BackupStopContainerLabel,
2021-08-22 19:37:48 +02:00
)
2021-08-29 18:26:40 +02:00
containersToStop, err := s.cli.ContainerList(context.Background(), types.ContainerListOptions{
2021-08-21 19:26:42 +02:00
Quiet: true,
Filters: filters.NewArgs(filters.KeyValuePair{
2021-08-22 19:37:48 +02:00
Key: "label",
Value: containerLabel,
2021-08-21 19:26:42 +02:00
}),
})
if err != nil {
2021-08-24 11:39:27 +02:00
return noop, fmt.Errorf("stopContainersAndRun: error querying for containers to stop: %w", err)
}
if len(containersToStop) == 0 {
2021-08-24 11:39:27 +02:00
return noop, nil
}
2021-08-22 19:37:48 +02:00
s.logger.Infof(
"Stopping %d container(s) labeled `%s` out of %d running container(s).",
2021-08-22 19:37:48 +02:00
len(containersToStop),
containerLabel,
len(allContainers),
)
var stoppedContainers []types.Container
var stopErrors []error
for _, container := range containersToStop {
2021-08-29 18:26:40 +02:00
if err := s.cli.ContainerStop(context.Background(), container.ID, nil); err != nil {
stopErrors = append(stopErrors, err)
} else {
stoppedContainers = append(stoppedContainers, container)
}
}
var stopError error
2021-08-24 11:39:27 +02:00
if len(stopErrors) != 0 {
stopError = fmt.Errorf(
2021-08-24 11:39:27 +02:00
"stopContainersAndRun: %d error(s) stopping containers: %w",
len(stopErrors),
join(stopErrors...),
2021-08-24 11:39:27 +02:00
)
}
return func() error {
servicesRequiringUpdate := map[string]struct{}{}
var restartErrors []error
for _, container := range stoppedContainers {
if swarmServiceName, ok := container.Labels["com.docker.swarm.service.name"]; ok {
servicesRequiringUpdate[swarmServiceName] = struct{}{}
continue
}
2021-08-29 18:26:40 +02:00
if err := s.cli.ContainerStart(context.Background(), container.ID, types.ContainerStartOptions{}); err != nil {
restartErrors = append(restartErrors, err)
}
}
if len(servicesRequiringUpdate) != 0 {
2021-08-29 18:26:40 +02:00
services, _ := s.cli.ServiceList(context.Background(), types.ServiceListOptions{})
for serviceName := range servicesRequiringUpdate {
var serviceMatch swarm.Service
for _, service := range services {
if service.Spec.Name == serviceName {
serviceMatch = service
break
}
}
if serviceMatch.ID == "" {
return fmt.Errorf("stopContainersAndRun: couldn't find service with name %s", serviceName)
}
serviceMatch.Spec.TaskTemplate.ForceUpdate = 1
_, err := s.cli.ServiceUpdate(
2021-08-29 18:26:40 +02:00
context.Background(), serviceMatch.ID,
serviceMatch.Version, serviceMatch.Spec, types.ServiceUpdateOptions{},
2021-08-22 15:04:44 +02:00
)
if err != nil {
restartErrors = append(restartErrors, err)
}
}
}
if len(restartErrors) != 0 {
return fmt.Errorf(
"stopContainersAndRun: %d error(s) restarting containers and services: %w",
len(restartErrors),
join(restartErrors...),
)
}
2021-08-23 18:46:49 +02:00
s.logger.Infof(
"Restarted %d container(s) and the matching service(s).",
len(stoppedContainers),
)
return nil
}, stopError
2021-08-21 19:26:42 +02:00
}
// takeBackup creates a tar archive of the configured backup location and
// saves it to disk.
2021-08-21 19:26:42 +02:00
func (s *script) takeBackup() error {
2021-08-22 22:02:19 +02:00
s.file = timeutil.Strftime(&s.start, s.file)
if err := targz.Compress(s.c.BackupSources, s.file); err != nil {
2021-08-21 21:26:27 +02:00
return fmt.Errorf("takeBackup: error compressing backup folder: %w", err)
}
s.logger.Infof("Created backup of `%s` at `%s`.", s.c.BackupSources, s.file)
2021-08-21 21:26:27 +02:00
return nil
2021-08-21 19:26:42 +02:00
}
// encryptBackup encrypts the backup file using PGP and the configured passphrase.
// In case no passphrase is given it returns early, leaving the backup file
2021-08-29 10:23:25 +02:00
// untouched.
2021-08-21 19:26:42 +02:00
func (s *script) encryptBackup() error {
if s.c.GpgPassphrase == "" {
2021-08-21 19:26:42 +02:00
return nil
}
2021-08-24 11:39:27 +02:00
defer os.Remove(s.file)
2021-08-22 14:44:33 +02:00
gpgFile := fmt.Sprintf("%s.gpg", s.file)
outFile, err := os.Create(gpgFile)
defer outFile.Close()
if err != nil {
return fmt.Errorf("encryptBackup: error opening out file: %w", err)
}
_, name := path.Split(s.file)
dst, err := openpgp.SymmetricallyEncrypt(outFile, []byte(s.c.GpgPassphrase), &openpgp.FileHints{
2021-08-22 14:44:33 +02:00
IsBinary: true,
FileName: name,
}, nil)
defer dst.Close()
2021-08-22 14:44:33 +02:00
if err != nil {
return fmt.Errorf("encryptBackup: error encrypting backup file: %w", err)
}
src, err := os.Open(s.file)
2021-08-22 14:44:33 +02:00
if err != nil {
return fmt.Errorf("encryptBackup: error opening backup file %s: %w", s.file, err)
2021-08-22 14:44:33 +02:00
}
if _, err := io.Copy(dst, src); err != nil {
return fmt.Errorf("encryptBackup: error writing ciphertext to file: %w", err)
2021-08-22 14:44:33 +02:00
}
s.file = gpgFile
2021-08-23 07:07:44 +02:00
s.logger.Infof("Encrypted backup using given passphrase, saving as `%s`.", s.file)
2021-08-22 14:44:33 +02:00
return nil
2021-08-21 19:26:42 +02:00
}
// copyBackup makes sure the backup file is copied to both local and remote locations
// as per the given configuration.
2021-08-21 19:26:42 +02:00
func (s *script) copyBackup() error {
2021-08-21 21:26:27 +02:00
_, name := path.Split(s.file)
2021-08-29 18:26:40 +02:00
if s.mc != nil {
_, err := s.mc.FPutObject(context.Background(), s.c.AwsS3BucketName, name, s.file, minio.PutObjectOptions{
2021-08-21 21:26:27 +02:00
ContentType: "application/tar+gzip",
})
if err != nil {
return fmt.Errorf("copyBackup: error uploading backup to remote storage: %w", err)
}
2021-08-29 10:23:25 +02:00
s.logger.Infof("Uploaded a copy of backup `%s` to bucket `%s`.", s.file, s.c.AwsS3BucketName)
2021-08-21 21:26:27 +02:00
}
2021-08-22 15:04:44 +02:00
if _, err := os.Stat(s.c.BackupArchive); !os.IsNotExist(err) {
if err := copy(s.file, path.Join(s.c.BackupArchive, name)); err != nil {
return fmt.Errorf("copyBackup: error copying file to local archive: %w", err)
2021-08-21 21:26:27 +02:00
}
2021-08-29 10:23:25 +02:00
s.logger.Infof("Stored copy of backup `%s` in local archive `%s`.", s.file, s.c.BackupArchive)
2021-08-21 21:26:27 +02:00
}
return nil
2021-08-21 19:26:42 +02:00
}
2021-08-24 11:39:27 +02:00
// removeArtifacts removes the backup file from disk.
func (s *script) removeArtifacts() error {
2021-08-21 21:26:27 +02:00
if err := os.Remove(s.file); err != nil {
2021-08-24 11:39:27 +02:00
return fmt.Errorf("removeArtifacts: error removing file: %w", err)
2021-08-21 21:26:27 +02:00
}
2021-08-24 11:39:27 +02:00
s.logger.Info("Removed local artifacts.")
2021-08-21 21:26:27 +02:00
return nil
2021-08-21 19:26:42 +02:00
}
// pruneOldBackups rotates away backups from local and remote storages using
// the given configuration. In case the given configuration would delete all
// backups, it does nothing instead.
func (s *script) pruneOldBackups() error {
if s.c.BackupRetentionDays < 0 {
2021-08-21 19:26:42 +02:00
return nil
}
2021-08-22 22:02:19 +02:00
if s.c.BackupPruningLeeway != 0 {
s.logger.Infof("Sleeping for %s before pruning backups.", s.c.BackupPruningLeeway)
time.Sleep(s.c.BackupPruningLeeway)
}
2021-08-24 09:15:43 +02:00
deadline := time.Now().AddDate(0, 0, -int(s.c.BackupRetentionDays))
2021-08-22 14:00:21 +02:00
2021-08-29 18:26:40 +02:00
if s.mc != nil {
candidates := s.mc.ListObjects(context.Background(), s.c.AwsS3BucketName, minio.ListObjectsOptions{
2021-08-22 15:04:44 +02:00
WithMetadata: true,
Prefix: s.c.BackupPruningPrefix,
2021-08-22 15:04:44 +02:00
})
var matches []minio.ObjectInfo
2021-08-22 16:41:06 +02:00
var lenCandidates int
2021-08-22 15:04:44 +02:00
for candidate := range candidates {
2021-08-22 16:41:06 +02:00
lenCandidates++
if candidate.Err != nil {
2021-08-23 18:46:49 +02:00
return fmt.Errorf(
"pruneOldBackups: error looking up candidates from remote storage: %w",
candidate.Err,
)
2021-08-22 16:41:06 +02:00
}
2021-08-22 15:04:44 +02:00
if candidate.LastModified.Before(deadline) {
matches = append(matches, candidate)
}
}
2021-08-22 16:41:06 +02:00
if len(matches) != 0 && len(matches) != lenCandidates {
2021-08-22 15:04:44 +02:00
objectsCh := make(chan minio.ObjectInfo)
go func() {
2021-08-22 16:41:06 +02:00
for _, match := range matches {
objectsCh <- match
2021-08-22 15:04:44 +02:00
}
close(objectsCh)
2021-08-22 15:04:44 +02:00
}()
2021-08-29 18:26:40 +02:00
errChan := s.mc.RemoveObjects(context.Background(), s.c.AwsS3BucketName, objectsCh, minio.RemoveObjectsOptions{})
var removeErrors []error
2021-08-22 15:04:44 +02:00
for result := range errChan {
if result.Err != nil {
removeErrors = append(removeErrors, result.Err)
2021-08-22 15:04:44 +02:00
}
}
if len(removeErrors) != 0 {
2021-08-22 15:04:44 +02:00
return fmt.Errorf(
2021-08-22 22:02:19 +02:00
"pruneOldBackups: %d error(s) removing files from remote storage: %w",
len(removeErrors),
join(removeErrors...),
2021-08-22 15:04:44 +02:00
)
}
2021-08-22 16:41:06 +02:00
s.logger.Infof(
2021-08-26 12:50:22 +02:00
"Pruned %d out of %d remote backup(s) as their age exceeded the configured retention period of %d days.",
2021-08-22 16:41:06 +02:00
len(matches),
lenCandidates,
2021-08-26 12:50:22 +02:00
s.c.BackupRetentionDays,
2021-08-22 16:41:06 +02:00
)
} else if len(matches) != 0 && len(matches) == lenCandidates {
2021-08-22 19:37:48 +02:00
s.logger.Warnf(
2021-08-23 18:46:49 +02:00
"The current configuration would delete all %d remote backup copies.",
2021-08-22 19:37:48 +02:00
len(matches),
)
2021-08-23 18:46:49 +02:00
s.logger.Warn("Refusing to do so, please check your configuration.")
2021-08-22 16:41:06 +02:00
} else {
2021-08-22 22:02:19 +02:00
s.logger.Infof("None of %d remote backup(s) were pruned.", lenCandidates)
2021-08-22 15:04:44 +02:00
}
}
2021-08-22 15:04:44 +02:00
if _, err := os.Stat(s.c.BackupArchive); !os.IsNotExist(err) {
2021-08-22 15:04:44 +02:00
candidates, err := filepath.Glob(
path.Join(s.c.BackupArchive, fmt.Sprintf("%s*", s.c.BackupPruningPrefix)),
2021-08-22 14:00:21 +02:00
)
if err != nil {
2021-08-22 15:04:44 +02:00
return fmt.Errorf(
"pruneOldBackups: error looking up matching files, starting with: %w", err,
)
2021-08-22 14:00:21 +02:00
}
var matches []string
2021-08-22 15:04:44 +02:00
for _, candidate := range candidates {
fi, err := os.Stat(candidate)
2021-08-22 14:00:21 +02:00
if err != nil {
2021-08-22 15:04:44 +02:00
return fmt.Errorf(
"pruneOldBackups: error calling stat on file %s: %w",
candidate,
err,
)
2021-08-22 14:00:21 +02:00
}
if fi.ModTime().Before(deadline) {
matches = append(matches, candidate)
2021-08-22 14:00:21 +02:00
}
}
2021-08-22 16:41:06 +02:00
if len(matches) != 0 && len(matches) != len(candidates) {
var removeErrors []error
2021-08-22 15:04:44 +02:00
for _, candidate := range matches {
if err := os.Remove(candidate); err != nil {
removeErrors = append(removeErrors, err)
2021-08-22 14:00:21 +02:00
}
}
if len(removeErrors) != 0 {
2021-08-22 15:04:44 +02:00
return fmt.Errorf(
2021-08-22 22:02:19 +02:00
"pruneOldBackups: %d error(s) deleting local files, starting with: %w",
len(removeErrors),
join(removeErrors...),
2021-08-22 15:04:44 +02:00
)
}
2021-08-22 16:41:06 +02:00
s.logger.Infof(
2021-08-26 12:50:22 +02:00
"Pruned %d out of %d local backup(s) as their age exceeded the configured retention period of %d days.",
2021-08-22 16:41:06 +02:00
len(matches),
len(candidates),
2021-08-26 12:50:22 +02:00
s.c.BackupRetentionDays,
2021-08-22 16:41:06 +02:00
)
} else if len(matches) != 0 && len(matches) == len(candidates) {
2021-08-22 19:37:48 +02:00
s.logger.Warnf(
2021-08-23 18:46:49 +02:00
"The current configuration would delete all %d local backup copies.",
2021-08-22 19:37:48 +02:00
len(matches),
)
2021-08-23 18:46:49 +02:00
s.logger.Warn("Refusing to do so, please check your configuration.")
2021-08-22 16:41:06 +02:00
} else {
2021-08-22 22:02:19 +02:00
s.logger.Infof("None of %d local backup(s) were pruned.", len(candidates))
2021-08-22 14:00:21 +02:00
}
}
return nil
}
2021-08-29 10:23:25 +02:00
// must exits the script run non-zero and prematurely in case the given error
// is non-nil.
2021-08-22 19:37:48 +02:00
func (s *script) must(err error) {
if err != nil {
2021-08-24 11:39:27 +02:00
s.logger.Fatalf("Fatal error running backup: %s", err)
2021-08-21 21:26:27 +02:00
}
}
// lock opens a lockfile at the given location, keeping it locked until the
// caller invokes the returned release func. When invoked while the file is
// still locked the function panics.
func lock(lockfile string) func() error {
2021-08-23 18:46:49 +02:00
fileLock := flock.New(lockfile)
acquired, err := fileLock.TryLock()
if err != nil {
panic(err)
}
2021-08-23 18:46:49 +02:00
if !acquired {
panic("unable to acquire file lock")
}
2021-08-23 18:46:49 +02:00
return fileLock.Unlock
}
// copy creates a copy of the file located at `dst` at `src`.
2021-08-21 21:26:27 +02:00
func copy(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return err
}
_, err = io.Copy(out, in)
if err != nil {
out.Close()
2021-08-21 21:26:27 +02:00
return err
}
return out.Close()
}
// join takes a list of errors and joins them into a single error
func join(errs ...error) error {
if len(errs) == 1 {
return errs[0]
}
var msgs []string
for _, err := range errs {
if err == nil {
continue
}
msgs = append(msgs, err.Error())
}
return errors.New("[" + strings.Join(msgs, ", ") + "]")
}
// buffer takes an io.Writer and returns a wrapped version of the
// writer that writes to both the original target as well as the returned buffer
func buffer(w io.Writer) (io.Writer, *bytes.Buffer) {
buffering := &bufferingWriter{buf: bytes.Buffer{}, writer: w}
return buffering, &buffering.buf
}
type bufferingWriter struct {
buf bytes.Buffer
writer io.Writer
}
func (b *bufferingWriter) Write(p []byte) (n int, err error) {
if n, err := b.buf.Write(p); err != nil {
return n, fmt.Errorf("bufferingWriter: error writing to buffer: %w", err)
}
return b.writer.Write(p)
}