plain_inc_bak.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. #!/usr/bin/env python3
  2. __desc__ = "Incremental backup utility with optional upload to Amazon S3"
  3. __autor__ = "Juanjo Alvarez <juanjo@juanjoalvarez.net>"
  4. __license__ = "MIT"
  5. import os, shutil, sys, subprocess, shlex
  6. from traceback import format_exc
  7. try:
  8. from typing import List, Any, Optional
  9. except ImportError:
  10. List = list
  11. Any = object
  12. Optional = object
  13. from functools import wraps
  14. from time import time
  15. """
  16. TODO:
  17. - Support for mounting and unmounting devices before/after the backup
  18. - Bandwith and IO limiting options, check:
  19. http://unix.stackexchange.com/questions/48138/how-to-throttle-per-process-i-o-to-a-max-limit0/
  20. - setup.py
  21. - Some examples of usage and integration on the README.md
  22. """
  23. EMAIL_TEXTS = [] # type: List[str]
  24. def message(text: str, email: bool = True) -> None:
  25. global EMAIL_TEXTS
  26. print(text)
  27. sys.stdout.flush()
  28. if c.EMAIL_REPORT and email:
  29. EMAIL_TEXTS.append(text)
  30. def timeit(text: str):
  31. def decorator(func: Any) -> Any:
  32. @wraps(func)
  33. def wrapper(*args, **kwargs):
  34. start = time()
  35. res = func(*args, **kwargs)
  36. seconds = int(time() - start)
  37. m, s = divmod(seconds, 60)
  38. h, m = divmod(m, 60)
  39. elapsed = "%d:%02d:%02d" % (h, m, s)
  40. message('Time for {name}: {time}'.format(name=text, time=elapsed))
  41. return res
  42. return wrapper
  43. return decorator
  44. def find_config() -> Optional[str]:
  45. """
  46. Find the path to the configuration files. Priority order is:
  47. 1. (this file dir)
  48. 2. ~/.config/plain_inc_bak/config.py
  49. 3. ~/.plain_inc_bak_config.py
  50. 3. /etc/plain_inc_bak/config.pykwargs)
  51. Config files are *not* flattened, only one will be parsed
  52. """
  53. op = os.path
  54. curdir = op.join(op.dirname(op.abspath(__file__)), 'config.py')
  55. userconfig = op.expanduser('~/.config/plain_inc_bak/config.py')
  56. userroot = op.expanduser('~/.plain_inc_bak_config.py')
  57. etc = '/etc/plain_inc_bak/config.py'
  58. for d in (curdir, userconfig, userroot, etc):
  59. if op.exists(d):
  60. return d
  61. return None
  62. config_file_path = find_config()
  63. if config_file_path:
  64. import importlib.util as imputil
  65. spec = imputil.spec_from_file_location("c", config_file_path)
  66. c = imputil.module_from_spec(spec)
  67. spec.loader.exec_module(c)
  68. else:
  69. class c: pass
  70. def parse_arguments() -> Any:
  71. def get_best_compressor() -> str:
  72. from shutil import which
  73. # I'm only considering relatively fast compressors. pigz
  74. # is fast and uses a lot less memory so its my favorite
  75. for compressor in ('pigz', 'pbzip2', 'plzip', 'gzip'):
  76. path = which(compressor)
  77. if path:
  78. return path
  79. else:
  80. raise Exception('Could not find any suitable compressor')
  81. # end inner function
  82. import argparse
  83. parser = argparse.ArgumentParser(description=__desc__)
  84. parser.add_argument('-o', '--origin_dir', default=getattr(c,'ORIGIN', None),
  85. help='Origin directory to backup (will include subdirs')
  86. parser.add_argument('-B', '--backups_dir', default=getattr(c,'BACKUPS_DIR', None),
  87. help='Directory where the backups will be stored')
  88. parser.add_argument('-n', '--backup_basename', default=getattr(c,'BACKUP_BASENAME', None),
  89. help='First part of the backup directory names (numbers will be appended to it)')
  90. parser.add_argument('-m', '--max_backups', type=int, default=getattr(c,'MAX_BACKUPS', 7),
  91. help='Maximum number of incremental backups to keep')
  92. parser.add_argument('-x', '--exclude_dirs', type=list, default=getattr(c,'EXCLUDE', []),
  93. help='Comma separated list of directories to exclude from the backup.' +
  94. 'This option will remove any other configured exclude diretories')
  95. parser.add_argument('-u', '--upload_s3', action='store_true',
  96. help='Enable uploading to Amazon S3 of the most recent backup')
  97. parser.add_argument('-b', '--s3_bucket', default=getattr(c, 'S3_BUCKET', None),
  98. help='Name of the S3 bucket to upload')
  99. parser.add_argument('-e', '--s3_gpg_encrypt', action='store_true',
  100. help='Encrypt the backup with GPG before uploading to S3')
  101. parser.add_argument('-p', '--s3_gpg_pass', default=getattr(c, 'S3_GPG_PASSPHRASE', None),
  102. help='GPG passphrase to use for encrypted uploads')
  103. parser.add_argument('-a', '--s3_akey', default=getattr(c,'S3_ACCESS_KEY', None),
  104. help='S3 Access Key')
  105. parser.add_argument('-s', '--s3_secret', default=getattr(c,'S3_SECRET_KEY', None),
  106. help='S3 Secret Key')
  107. parser.add_argument('-E', '--email_report', action='store_true',
  108. help='Send an email report')
  109. parser.add_argument('-P', '--email_program',
  110. default=getattr(c,'EMAIL_PROGRAM', '/usr/sbin/sendmail'),
  111. help='Sendmail-style program to use for sending the email')
  112. parser.add_argument('-f', '--email_from', default=getattr(c,'EMAIL_FROM', None),
  113. help='"From" field to use in the report email')
  114. parser.add_argument('-d', '--email_dest', default=getattr(c,'EMAIL_DEST', None),
  115. help='Address where the report email will be sent')
  116. parser.add_argument('-D', '--dry_run', action='store_true',
  117. help='Dont really compress or upload anything')
  118. parser.add_argument('-C', '--compressor', default=get_best_compressor(),
  119. help='Program for compressing backups before uploading. If missing a ' +
  120. 'program will be automatically selected')
  121. parser.add_argument('--nogpg', action='store_true',
  122. help='Avoid doing GPG compression when it would normally be configured to do so')
  123. parser.add_argument('--norotate', action='store_true',
  124. help='Avoid rotating the backups when it would normally be configured to do so')
  125. parser.add_argument('--norsync', action='store_true',
  126. help='Avoid doing the rsync to the .0 backup directory when it would normally '
  127. 'be configured to do so')
  128. args = parser.parse_args()
  129. c.S3_UPLOAD_ENABLED = args.upload_s3 or c.S3_UPLOAD_ENABLED
  130. c.S3_BUCKET = args.s3_bucket
  131. c.S3_ACCESS_KEY = args.s3_akey
  132. c.S3_SECRET_KEY = args.s3_secret
  133. c.S3_GPG_ENCRYPT = args.s3_gpg_encrypt or c.S3_GPG_ENCRYPT
  134. c.EMAIL_REPORT = args.email_report or c.EMAIL_REPORT
  135. c.EMAIL_PROGRAM = args.email_program
  136. c.EMAIL_FROM = args.email_from
  137. c.EMAIL_DEST = args.email_dest
  138. c.DRY_RUN = args.dry_run or c.DRY_RUN
  139. c.ORIGIN = args.origin_dir
  140. c.BACKUPS_DIR = args.backups_dir
  141. c.BACKUP_BASENAME = args.backup_basename or 'backup'
  142. c.MAX_BACKUPS = args.max_backups
  143. c.EXCLUDE = args.exclude_dirs
  144. c.COMPRESSOR = args.compressor
  145. c.NOGPG = args.nogpg
  146. c.NOROTATE = args.norotate
  147. c.NORSYNC = args.norsync
  148. def printerror(msg):
  149. print('Error: {}'.format(msg), file=sys.stderr)
  150. parser.print_help()
  151. exit(1)
  152. if c.S3_UPLOAD_ENABLED and not (c.S3_BUCKET or c.S3_ACCESS_KEY or c.S3_SECRET_KEY):
  153. printerror('enabled S3 uploads require the bucket, access_key and secret_key options',)
  154. if c.S3_GPG_ENCRYPT and not c.S3_GPG_PASSPHRASE:
  155. printerror('gpg encrypting needs a gpg passphrase')
  156. if c.EMAIL_REPORT and not (c.EMAIL_PROGRAM or c.EMAIL_FROM or c.EMAIL_DEST):
  157. printerror('enabled email reports require the program, from and destination')
  158. if not c.ORIGIN:
  159. printerror('you need to configure an origin directory')
  160. if not c.BACKUPS_DIR:
  161. printerror('you need to configure a backups destination direcory')
  162. @timeit(text='RSync to the most recent directory')
  163. def rsync_first(zerodir):
  164. # Now do the real backup with rsync
  165. excludeparams = ['--exclude={}/*'.format(i) for i in c.EXCLUDE]
  166. rsynccmd = ['/usr/bin/rsync', '-azAXSH', '--delete', *excludeparams, c.ORIGIN, zerodir]
  167. message('Running rsync with:\n{}'.format(' '.join(rsynccmd)))
  168. subprocess.check_call(rsynccmd)
  169. message('Rsync completed successfully')
  170. @timeit(text='Backup compression for upload')
  171. def compress_backup(dirpath: str) -> str:
  172. outpath = dirpath + '.tar.gz'
  173. message('Compressing directory {} to {}'.format(dirpath, outpath))
  174. # Remove the leading '/'; we'll instruct tar to change to the root directory
  175. # with the -C / option thus avoiding the "removing leading /" message
  176. if dirpath.startswith('/'):
  177. dirpath = dirpath[1:]
  178. compressor = c.COMPRESSOR
  179. cmd = "tar c --warning='no-file-ignored' --directory=/ {dirpath}|{compressor} > {outpath}"\
  180. .format(**locals())
  181. print(cmd)
  182. if not c.DRY_RUN:
  183. output = subprocess.check_output(cmd, shell=True).decode()
  184. message(output)
  185. return outpath
  186. @timeit(text='GPG encrypting the backup for upload')
  187. def gpg_encrypt_file(filepath: str) -> None:
  188. gpgpath = filepath + '.gpg'
  189. if os.path.exists(gpgpath):
  190. message('Warning: deleting previously existing GPG file: {}'.format(gpgpath))
  191. os.unlink(gpgpath)
  192. cmd = 'gpg --batch --symmetric --cipher-algo AES256 --passphrase-fd 0 {}'.format(filepath)
  193. message('Encrypting backup with command: {}'.format(cmd))
  194. if not c.DRY_RUN:
  195. p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE,
  196. stdin=subprocess.PIPE, stderr=subprocess.PIPE)
  197. (stdout, stderr) = p.communicate(c.S3_GPG_PASSPHRASE.encode())
  198. if p.returncode != 0:
  199. raise Exception('Could not encrypt file: {}'.format(stdout + stderr))
  200. message('File encrypted successfully')
  201. return gpgpath
  202. @timeit(text='Uploading to S3')
  203. def upload_s3(dirpath: str) -> None:
  204. import boto3, time
  205. real_filepath = ''
  206. compressed_filepath = compress_backup(dirpath)
  207. if c.S3_GPG_ENCRYPT and not c.NOGPG:
  208. real_filepath = gpg_encrypt_file(compressed_filepath)
  209. real_filepath = compressed_filepath if not real_filepath else real_filepath
  210. datepart = time.strftime("%Y%m%d%H%M%S")
  211. remote_filename = os.path.split(real_filepath)[1] + '.' + datepart
  212. with open(real_filepath, 'rb') as data:
  213. message('Uploading file to Amazon S3')
  214. if not c.DRY_RUN:
  215. s3 = boto3.client('s3', aws_access_key_id=c.S3_ACCESS_KEY,
  216. aws_secret_access_key=c.S3_SECRET_KEY)
  217. s3.upload_fileobj(data, c.S3_BUCKET, remote_filename)
  218. if not c.DRY_RUN:
  219. if c.S3_GPG_ENCRYPT:
  220. # Remove the local encrupted file
  221. os.unlink(real_filepath)
  222. os.unlink(compressed_filepath)
  223. message('File uploaded to S3 bucket "{}" as key "{}"'.
  224. format(c.S3_BUCKET, remote_filename))
  225. def send_mail(subject: str, content: str) -> None:
  226. # The machine wont have a real smtp server, only a MDA, and the script wont have access to
  227. # external SMTP servers, so a sendmail-style binary will be used for delivery
  228. from textwrap import dedent
  229. from_ = c.EMAIL_FROM
  230. to = c.EMAIL_DEST
  231. real_content = dedent('''\
  232. From: {from_}
  233. To: {to}
  234. Subject: {subject}
  235. {content}
  236. '''.format(**locals()))
  237. sendmail = subprocess.Popen([c.EMAIL_PROGRAM, to], stdin=subprocess.PIPE,
  238. stdout=subprocess.PIPE, bufsize=1)
  239. stdout, stderr = sendmail.communicate(bytearray(real_content, 'utf-8'))
  240. @timeit(text='Rotating backups')
  241. def rotate_backups(backup_dirs: List[str]) -> None:
  242. backup_nums = sorted([int(i.split('.')[1]) for i in backup_dirs])
  243. backup_nums.reverse()
  244. for i in backup_nums:
  245. full_dirname = os.path.join(c.BACKUPS_DIR, '{}.{}'.format(c.BACKUP_BASENAME, i))
  246. if i >= c.MAX_BACKUPS:
  247. # Delete the oldest ones
  248. message('Deleting {}'.format(full_dirname))
  249. if not c.DRY_RUN:
  250. shutil.rmtree(full_dirname)
  251. else:
  252. # Rename to the greater number except for the 0 which will be
  253. # copied with hard links
  254. inc_dirname = os.path.join(c.BACKUPS_DIR, '{}.{}'.format(c.BACKUP_BASENAME, i+1))
  255. dir_params = (full_dirname, inc_dirname) # DRY
  256. if i == 0:
  257. message('Hardlink-copying "{}" => "{}"'.format(*dir_params))
  258. if not c.DRY_RUN:
  259. ret = os.system('cp -al {} {}'.format(*dir_params))
  260. if ret != 0:
  261. raise Exception('cp -al returned error {}!'.format(ret))
  262. else:
  263. message('Moving "{}" => "{}"'.format(*dir_params))
  264. if not c.DRY_RUN:
  265. shutil.move(*dir_params)
  266. def main() -> None:
  267. try:
  268. parse_arguments()
  269. if not os.path.exists(c.BACKUPS_DIR):
  270. raise Exception('Missing backups dir: {}'.format(c.BACKUPS_DIR))
  271. backup_dirs = [i for i in os.listdir(c.BACKUPS_DIR)
  272. if i.startswith(c.BACKUP_BASENAME) and
  273. os.path.isdir(os.path.join(c.BACKUPS_DIR, i))]
  274. if backup_dirs and not c.NOROTATE:
  275. rotate_backups(backup_dirs)
  276. zerodir = os.path.join(c.BACKUPS_DIR, '{}.0'.format(c.BACKUP_BASENAME))
  277. if not c.DRY_RUN and not c.NORSYNC:
  278. rsync_first(zerodir)
  279. if c.S3_UPLOAD_ENABLED:
  280. upload_s3(zerodir)
  281. except Exception as e:
  282. backup_completed = False
  283. message(format_exc())
  284. if hasattr(e, 'output'):
  285. message(e.output) # type: ignore
  286. else:
  287. backup_completed = True
  288. if backup_completed :
  289. email_subject = '[BACKUP SUCESS] Backup completed'
  290. else:
  291. email_subject = '[BACKUP FAILED] Backup problems!'
  292. if c.EMAIL_REPORT:
  293. send_mail(email_subject, '\n'.join(EMAIL_TEXTS))
  294. return 0 if backup_completed else 1
  295. if __name__ == '__main__':
  296. sys.exit(main())