123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371 |
- #!/usr/bin/env python3
- __desc__ = "Incremental backup utility with optional upload to Amazon S3"
- __autor__ = "Juanjo Alvarez <juanjo@juanjoalvarez.net>"
- __license__ = "MIT"
- import os, shutil, sys, subprocess, shlex
- from traceback import format_exc
- try:
- from typing import List, Any, Optional
- except ImportError:
- List = list
- Any = object
- Optional = object
- from functools import wraps
- from time import time
- """
- TODO:
- - Support for mounting and unmounting devices before/after the backup
- - Bandwith and IO limiting options, check:
- http://unix.stackexchange.com/questions/48138/how-to-throttle-per-process-i-o-to-a-max-limit0/
- - setup.py
- - Some examples of usage and integration on the README.md
- """
- EMAIL_TEXTS = [] # type: List[str]
- def message(text: str, email: bool = True) -> None:
- global EMAIL_TEXTS
- print(text)
- sys.stdout.flush()
- if c.EMAIL_REPORT and email:
- EMAIL_TEXTS.append(text)
- def timeit(text: str):
- def decorator(func: Any) -> Any:
- @wraps(func)
- def wrapper(*args, **kwargs):
- start = time()
- res = func(*args, **kwargs)
- seconds = int(time() - start)
- m, s = divmod(seconds, 60)
- h, m = divmod(m, 60)
- elapsed = "%d:%02d:%02d" % (h, m, s)
- message('Time for {name}: {time}'.format(name=text, time=elapsed))
- return res
- return wrapper
- return decorator
- def find_config() -> Optional[str]:
- """
- Find the path to the configuration files. Priority order is:
- 1. (this file dir)
- 2. ~/.config/plain_inc_bak/config.py
- 3. ~/.plain_inc_bak_config.py
- 3. /etc/plain_inc_bak/config.pykwargs)
- Config files are *not* flattened, only one will be parsed
- """
- op = os.path
- curdir = op.join(op.dirname(op.abspath(__file__)), 'config.py')
- userconfig = op.expanduser('~/.config/plain_inc_bak/config.py')
- userroot = op.expanduser('~/.plain_inc_bak_config.py')
- etc = '/etc/plain_inc_bak/config.py'
- for d in (curdir, userconfig, userroot, etc):
- if op.exists(d):
- return d
- return None
- config_file_path = find_config()
- if config_file_path:
- import importlib.util as imputil
- spec = imputil.spec_from_file_location("c", config_file_path)
- c = imputil.module_from_spec(spec)
- spec.loader.exec_module(c)
- else:
- class c: pass
- def parse_arguments() -> Any:
- def get_best_compressor() -> str:
- from shutil import which
- # I'm only considering relatively fast compressors. pigz
- # is fast and uses a lot less memory so its my favorite
- for compressor in ('pigz', 'pbzip2', 'plzip', 'gzip'):
- path = which(compressor)
- if path:
- return path
- else:
- raise Exception('Could not find any suitable compressor')
- # end inner function
- import argparse
- parser = argparse.ArgumentParser(description=__desc__)
- parser.add_argument('-o', '--origin_dir', default=getattr(c,'ORIGIN', None),
- help='Origin directory to backup (will include subdirs')
- parser.add_argument('-B', '--backups_dir', default=getattr(c,'BACKUPS_DIR', None),
- help='Directory where the backups will be stored')
- parser.add_argument('-n', '--backup_basename', default=getattr(c,'BACKUP_BASENAME', None),
- help='First part of the backup directory names (numbers will be appended to it)')
- parser.add_argument('-m', '--max_backups', type=int, default=getattr(c,'MAX_BACKUPS', 7),
- help='Maximum number of incremental backups to keep')
- parser.add_argument('-x', '--exclude_dirs', type=list, default=getattr(c,'EXCLUDE', []),
- help='Comma separated list of directories to exclude from the backup.' +
- 'This option will remove any other configured exclude diretories')
- parser.add_argument('-u', '--upload_s3', action='store_true',
- help='Enable uploading to Amazon S3 of the most recent backup')
- parser.add_argument('-b', '--s3_bucket', default=getattr(c, 'S3_BUCKET', None),
- help='Name of the S3 bucket to upload')
- parser.add_argument('-e', '--s3_gpg_encrypt', action='store_true',
- help='Encrypt the backup with GPG before uploading to S3')
- parser.add_argument('-p', '--s3_gpg_pass', default=getattr(c, 'S3_GPG_PASSPHRASE', None),
- help='GPG passphrase to use for encrypted uploads')
- parser.add_argument('-a', '--s3_akey', default=getattr(c,'S3_ACCESS_KEY', None),
- help='S3 Access Key')
- parser.add_argument('-s', '--s3_secret', default=getattr(c,'S3_SECRET_KEY', None),
- help='S3 Secret Key')
- parser.add_argument('-E', '--email_report', action='store_true',
- help='Send an email report')
- parser.add_argument('-P', '--email_program',
- default=getattr(c,'EMAIL_PROGRAM', '/usr/sbin/sendmail'),
- help='Sendmail-style program to use for sending the email')
- parser.add_argument('-f', '--email_from', default=getattr(c,'EMAIL_FROM', None),
- help='"From" field to use in the report email')
- parser.add_argument('-d', '--email_dest', default=getattr(c,'EMAIL_DEST', None),
- help='Address where the report email will be sent')
- parser.add_argument('-D', '--dry_run', action='store_true',
- help='Dont really compress or upload anything')
- parser.add_argument('-C', '--compressor', default=get_best_compressor(),
- help='Program for compressing backups before uploading. If missing a ' +
- 'program will be automatically selected')
- parser.add_argument('--nogpg', action='store_true',
- help='Avoid doing GPG compression when it would normally be configured to do so')
- parser.add_argument('--norotate', action='store_true',
- help='Avoid rotating the backups when it would normally be configured to do so')
- parser.add_argument('--norsync', action='store_true',
- help='Avoid doing the rsync to the .0 backup directory when it would normally '
- 'be configured to do so')
- args = parser.parse_args()
- c.S3_UPLOAD_ENABLED = args.upload_s3 or c.S3_UPLOAD_ENABLED
- c.S3_BUCKET = args.s3_bucket
- c.S3_ACCESS_KEY = args.s3_akey
- c.S3_SECRET_KEY = args.s3_secret
- c.S3_GPG_ENCRYPT = args.s3_gpg_encrypt or c.S3_GPG_ENCRYPT
- c.EMAIL_REPORT = args.email_report or c.EMAIL_REPORT
- c.EMAIL_PROGRAM = args.email_program
- c.EMAIL_FROM = args.email_from
- c.EMAIL_DEST = args.email_dest
- c.DRY_RUN = args.dry_run or c.DRY_RUN
- c.ORIGIN = args.origin_dir
- c.BACKUPS_DIR = args.backups_dir
- c.BACKUP_BASENAME = args.backup_basename or 'backup'
- c.MAX_BACKUPS = args.max_backups
- c.EXCLUDE = args.exclude_dirs
- c.COMPRESSOR = args.compressor
- c.NOGPG = args.nogpg
- c.NOROTATE = args.norotate
- c.NORSYNC = args.norsync
- def printerror(msg):
- print('Error: {}'.format(msg), file=sys.stderr)
- parser.print_help()
- exit(1)
- if c.S3_UPLOAD_ENABLED and not (c.S3_BUCKET or c.S3_ACCESS_KEY or c.S3_SECRET_KEY):
- printerror('enabled S3 uploads require the bucket, access_key and secret_key options',)
- if c.S3_GPG_ENCRYPT and not c.S3_GPG_PASSPHRASE:
- printerror('gpg encrypting needs a gpg passphrase')
- if c.EMAIL_REPORT and not (c.EMAIL_PROGRAM or c.EMAIL_FROM or c.EMAIL_DEST):
- printerror('enabled email reports require the program, from and destination')
- if not c.ORIGIN:
- printerror('you need to configure an origin directory')
- if not c.BACKUPS_DIR:
- printerror('you need to configure a backups destination direcory')
- @timeit(text='RSync to the most recent directory')
- def rsync_first(zerodir):
- # Now do the real backup with rsync
- excludeparams = ['--exclude={}/*'.format(i) for i in c.EXCLUDE]
- rsynccmd = ['/usr/bin/rsync', '-azAXSH', '--delete', *excludeparams, c.ORIGIN, zerodir]
- message('Running rsync with:\n{}'.format(' '.join(rsynccmd)))
- subprocess.check_call(rsynccmd)
- message('Rsync completed successfully')
- @timeit(text='Backup compression for upload')
- def compress_backup(dirpath: str) -> str:
- outpath = dirpath + '.tar.gz'
- message('Compressing directory {} to {}'.format(dirpath, outpath))
- # Remove the leading '/'; we'll instruct tar to change to the root directory
- # with the -C / option thus avoiding the "removing leading /" message
- if dirpath.startswith('/'):
- dirpath = dirpath[1:]
- compressor = c.COMPRESSOR
- cmd = "tar c --warning='no-file-ignored' --directory=/ {dirpath}|{compressor} > {outpath}"\
- .format(**locals())
- print(cmd)
- if not c.DRY_RUN:
- output = subprocess.check_output(cmd, shell=True).decode()
- message(output)
- return outpath
- @timeit(text='GPG encrypting the backup for upload')
- def gpg_encrypt_file(filepath: str) -> None:
- gpgpath = filepath + '.gpg'
- if os.path.exists(gpgpath):
- message('Warning: deleting previously existing GPG file: {}'.format(gpgpath))
- os.unlink(gpgpath)
- cmd = 'gpg --batch --symmetric --cipher-algo AES256 --passphrase-fd 0 {}'.format(filepath)
- message('Encrypting backup with command: {}'.format(cmd))
- if not c.DRY_RUN:
- p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE,
- stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- (stdout, stderr) = p.communicate(c.S3_GPG_PASSPHRASE.encode())
- if p.returncode != 0:
- raise Exception('Could not encrypt file: {}'.format(stdout + stderr))
- message('File encrypted successfully')
- return gpgpath
- @timeit(text='Uploading to S3')
- def upload_s3(dirpath: str) -> None:
- import boto3, time
- real_filepath = ''
- compressed_filepath = compress_backup(dirpath)
- if c.S3_GPG_ENCRYPT and not c.NOGPG:
- real_filepath = gpg_encrypt_file(compressed_filepath)
- real_filepath = compressed_filepath if not real_filepath else real_filepath
- datepart = time.strftime("%Y%m%d%H%M%S")
- remote_filename = os.path.split(real_filepath)[1] + '.' + datepart
- with open(real_filepath, 'rb') as data:
- message('Uploading file to Amazon S3')
- if not c.DRY_RUN:
- s3 = boto3.client('s3', aws_access_key_id=c.S3_ACCESS_KEY,
- aws_secret_access_key=c.S3_SECRET_KEY)
- s3.upload_fileobj(data, c.S3_BUCKET, remote_filename)
- if not c.DRY_RUN:
- if c.S3_GPG_ENCRYPT:
- # Remove the local encrupted file
- os.unlink(real_filepath)
- os.unlink(compressed_filepath)
- message('File uploaded to S3 bucket "{}" as key "{}"'.
- format(c.S3_BUCKET, remote_filename))
- def send_mail(subject: str, content: str) -> None:
- # The machine wont have a real smtp server, only a MDA, and the script wont have access to
- # external SMTP servers, so a sendmail-style binary will be used for delivery
- from textwrap import dedent
- from_ = c.EMAIL_FROM
- to = c.EMAIL_DEST
- real_content = dedent('''\
- From: {from_}
- To: {to}
- Subject: {subject}
- {content}
- '''.format(**locals()))
- sendmail = subprocess.Popen([c.EMAIL_PROGRAM, to], stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, bufsize=1)
- stdout, stderr = sendmail.communicate(bytearray(real_content, 'utf-8'))
- @timeit(text='Rotating backups')
- def rotate_backups(backup_dirs: List[str]) -> None:
- backup_nums = sorted([int(i.split('.')[1]) for i in backup_dirs])
- backup_nums.reverse()
- for i in backup_nums:
- full_dirname = os.path.join(c.BACKUPS_DIR, '{}.{}'.format(c.BACKUP_BASENAME, i))
- if i >= c.MAX_BACKUPS:
- # Delete the oldest ones
- message('Deleting {}'.format(full_dirname))
- if not c.DRY_RUN:
- shutil.rmtree(full_dirname)
- else:
- # Rename to the greater number except for the 0 which will be
- # copied with hard links
- inc_dirname = os.path.join(c.BACKUPS_DIR, '{}.{}'.format(c.BACKUP_BASENAME, i+1))
- dir_params = (full_dirname, inc_dirname) # DRY
- if i == 0:
- message('Hardlink-copying "{}" => "{}"'.format(*dir_params))
- if not c.DRY_RUN:
- ret = os.system('cp -al {} {}'.format(*dir_params))
- if ret != 0:
- raise Exception('cp -al returned error {}!'.format(ret))
- else:
- message('Moving "{}" => "{}"'.format(*dir_params))
- if not c.DRY_RUN:
- shutil.move(*dir_params)
- def main() -> None:
- try:
- parse_arguments()
- if not os.path.exists(c.BACKUPS_DIR):
- raise Exception('Missing backups dir: {}'.format(c.BACKUPS_DIR))
- backup_dirs = [i for i in os.listdir(c.BACKUPS_DIR)
- if i.startswith(c.BACKUP_BASENAME) and
- os.path.isdir(os.path.join(c.BACKUPS_DIR, i))]
- if backup_dirs and not c.NOROTATE:
- rotate_backups(backup_dirs)
- zerodir = os.path.join(c.BACKUPS_DIR, '{}.0'.format(c.BACKUP_BASENAME))
- if not c.DRY_RUN and not c.NORSYNC:
- rsync_first(zerodir)
- if c.S3_UPLOAD_ENABLED:
- upload_s3(zerodir)
- except Exception as e:
- backup_completed = False
- message(format_exc())
- if hasattr(e, 'output'):
- message(e.output) # type: ignore
- else:
- backup_completed = True
- if backup_completed :
- email_subject = '[BACKUP SUCESS] Backup completed'
- else:
- email_subject = '[BACKUP FAILED] Backup problems!'
- if c.EMAIL_REPORT:
- send_mail(email_subject, '\n'.join(EMAIL_TEXTS))
- return 0 if backup_completed else 1
- if __name__ == '__main__':
- sys.exit(main())
|