pgback.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. #!/usr/bin/python
  2. #-*- coding: utf-8 -*-
  3. # Name: PostgreSQL backup script (designed for Odoo)
  4. # Author: Ryan Cole (https://ryanc.me/)
  5. #
  6. # Description:
  7. # Simple comand-line script to handle creation/restoration of database backups,
  8. # archiving to offsite locations, and auto-cleanup of old backups.
  9. #
  10. # Requires:
  11. # - python 2.7.*
  12. # - python-docopt
  13. # - AWS CLI
  14. #TODO: move to direct API calls for pg_dump, pg_restore, and AWS-related functions.
  15. #TODO: `list`, `push`, `pull` and `cleanup` functions
  16. #TODO: exception handling (datetime, subprocess, etc)
  17. #TODO: `--mask` switch for setting local file permissions
  18. #TODO: `--dry` switch for testing
  19. #TODO: fileserver/SCP integration
  20. #TODO: support for gpg encryption
  21. #TODO: string-matching for command options (e.g. `command --switch --input=<filename>` would auto-fill the filename)
  22. conf = {
  23. # default local working directory for database dumps
  24. "--dir": "/opt/odoo-backups/",
  25. # default postgresql connection info
  26. # for password-authentication
  27. "-u": "username",
  28. "-w": "password",
  29. "-h": "host",
  30. "-p": "port",
  31. # enable for peer-authentication - this requires that the script is run as the Odoo user
  32. "--peer": False,
  33. # default remote fileserver ssh info
  34. # note: currently not implemented
  35. "--fsuser": "username",
  36. "--fskey": "keyfile",
  37. "--fshost": "",
  38. "--fsport": "22",
  39. # default aws s3 bucket
  40. "--bucket": "my_bucket_name",
  41. # override this to use a different profile for aws cli auth
  42. # run `aws configure --profile=<name>`
  43. "--profile": "<name>",
  44. # default recipient name for gpg encryption
  45. # this 'name' corresponds to a public key
  46. # in the current users' gpg keychain
  47. "--gpgname": "admin@mydomain.com",
  48. # format for timestamps in backup filenames (e.g. <database-name>__<--savefmt>.pgdump.gz.gpg)
  49. # warning: the script uses double-underscores (e.g. `__`) to separate the database-name and
  50. # timestamp in the filename. using __ in the savefmt will probably break things.
  51. "--savefmt": "%Y-%m-%d_%H-%M-%S",
  52. # format for the `--date=xx` arg
  53. "--datefmt": "%d/%m/%Y",
  54. # "--datefmt": "%m/%d/%Y",
  55. # logfile location (must be writeable)
  56. "--logfile": "/var/log/zorb-backup.log",
  57. # archive commands
  58. # options are appended
  59. "gzipCommand": "gzip",
  60. "gzipOptions": "-9 --force",
  61. "gunzipCommand": "gunzip",
  62. "gunzipOptions": "--force",
  63. # database dump/restore/create commands
  64. # defaults here are tailored for Odoo,
  65. # but should work fine for general backups
  66. "dumpCommand": "pg_dump",
  67. "dumpOptions": "-E UTF-8 -F p -b",
  68. "restoreCommand": "psql",
  69. "restoreOptions": "",
  70. "createCommand": "createdb",
  71. "createOptions": "",
  72. # encryption/decryption commands and options
  73. "encryptCommand": "gpg",
  74. "encryptOptions": "--no-use-agent --quiet --no-tty --batch --yes --cipher-algo AES256",
  75. "decryptCommand": "gpg",
  76. "decryptOptions": "--no-use-agent --quiet --yes",
  77. # cleanup old backups every time the script runs?
  78. "alwaysCleanup": False,
  79. }
  80. doc = """PostgreSQL Backups Script
  81. Usage:
  82. pgback.py create (s3 | fileserver | local | all) <source-db> [options]
  83. pgback.py restore (local | s3 | fileserver) <source-db-name> <dest-db> [--name=<filename> | --date=<date>] [options]
  84. pgback.py cleanup (s3 | fileserver | local | all) <maxage-days> [db-name] [--archive=(s3 | fileserver)] [options]
  85. pgback.py push (s3 | fileserver | all) <dbname> [--name=<filename> | --date=<date>] [options]
  86. pgback.py pull (s3 | fileserver) <dbname> [--name=<filename> | --date=<date>] [options]
  87. pgback.py list (s3 | fileserver | local | all) [dbname]
  88. pgback.py (-h | --help | --version)
  89. Note:
  90. Options are read from the config table by default, but can be overriden with command-line arguments/switches.
  91. See the `config = {}` section of this file for more details.
  92. Examples:
  93. Create a backup of `livedb` using peer authentication, and push it to s3://my.backups.bucket
  94. > pgback.py create s3 livedb -u pgusername --peer --bucket my.backups.bucket
  95. Create a backup of `livedb` using password authentication, and push it to a remote fileserver using scp
  96. > pgback.py create fileserver livedb -u odoo -w password --fsinfo username:password@backups.example.com:/home/backups/livedb --fsport 22
  97. Restore a backup of `backupdb` to a new database called `newdb`. Search s3://my.backups.bucket for the latest backup from 24/08/2016
  98. > pgback.py restore s3 backupdb newdb --date 24/08/2016
  99. Restore a backup of `backupdb`. --date and --name are omitted, so the script will find the most recent backup matching `backupdb`
  100. > pgback.py restore local backupdb newdb --dir /home/backups/livedb/
  101. TODO: more examples
  102. Options:
  103. --date=<date> search for files whose date matches dd/mm/yyyy in the servers' time
  104. pulls most recent backup from selected day if multiple matches are found
  105. --name=<name> search for literal filename match
  106. -u <username> database username
  107. -w <password> database password (not available when using --peer)
  108. -h <host> database host (not available when using --peer)
  109. -p <port> database port
  110. --peer use peer authentication for database
  111. --bucket <bucket> bucket to use for s3 uploads/downloads
  112. --profile <profile> profile to use for aws cli auth (see `aws configure help`)
  113. --gpgname <name> name to use as recipient for gpg encryption
  114. --gpgpass <pass> password for symmetric encryption with gpg (mutually exlusive with --gpgname)
  115. --fsinfo standard ssh connection string (e.g. user:password@host:/folder)
  116. --fskey ssh keyfile for fileserver
  117. --fsuser user for fileserver
  118. --fshost host for fileserver
  119. --fsport port for fileserver
  120. --fspath path to backups files on fileserver
  121. --dir <dir> working directory for backup files (default is ./)
  122. --savefmt <format> datetime format to use for backup filenames
  123. --datefmt <format> datetime format to use for the '--date=' arg
  124. --su <user> database dump/restore script as <user> (script must be run as root)
  125. --logfile <logfile> must be writeable by whoever is running the script
  126. -a, --all list ALL backup files when using the `list` command
  127. -v, --verbose enable extra-detailed output
  128. -s, --silent disable all output (does NOT imply -x)
  129. -x, --noconfirm disable yes/no confirmations for irreversible actions
  130. such as database restores, or file deletions
  131. -z, --nozip disable gzipping
  132. """
  133. from docopt import docopt
  134. from datetime import datetime
  135. from os import path, devnull
  136. import sys
  137. import subprocess
  138. # process args with docopt
  139. args = docopt(doc, version="1.0.0")
  140. # black hole for pesky information
  141. devnull = open(devnull, "w")
  142. # print to logfile
  143. def log(t, message):
  144. dateString = datetime.now().strftime("%Y/%m/%d %I:%M:%S%p")
  145. string = dateString + " " + t + ": " + message
  146. with open(conf["--logfile"], "a") as f:
  147. f.write(string + "\n")
  148. # print to stdout
  149. def say(message, sameline=False, silent=True):
  150. # no output with the -s or --silent switch
  151. if (arg("-s") or arg("--silent")) and silent:
  152. return
  153. if sameline:
  154. print( message + " ",)
  155. sys.stdout.flush()
  156. else:
  157. print (message)
  158. # helper function for running shell commands
  159. def cmd(message, detail, cmd, stdout=None):
  160. if arg("-s") or arg("--silent"):
  161. stdout=devnull
  162. say(message, True)
  163. status = subprocess.call(cmd, shell=True, stdout=stdout)
  164. if status == 0:
  165. log("SUCCESS", message + detail)
  166. say("done")
  167. else:
  168. log("ERROR", message + detail)
  169. say("error!")
  170. sys.exit(1)
  171. # check for argument presence or value
  172. def arg(name, default=None):
  173. if name in args and args[name] != None and args[name] != False:
  174. return args[name]
  175. if default != None:
  176. return default
  177. if name in conf and conf[name] != None:
  178. return conf[name]
  179. return False
  180. # yes/no prompt, return true/false respectively
  181. def promptYesNo(message, default=False):
  182. if arg("-x") or arg("--noconfirm"):
  183. return True
  184. if default:
  185. prompt = " [Y/n]:"
  186. elif not default:
  187. prompt = " [y/N]:"
  188. else:
  189. prompt = " [y/n]:"
  190. say(message + prompt, sameline=True, silent=False)
  191. choice = raw_input().lower()
  192. if not default:
  193. if choice == "y":
  194. return True
  195. else:
  196. return False
  197. elif default:
  198. if choice == "n":
  199. return False
  200. else:
  201. return True
  202. else:
  203. if choice == "y":
  204. return True
  205. elif choice == "n":
  206. return False
  207. else:
  208. return None
  209. # clear buffer
  210. # parse backup filename to dbname, date
  211. def parseFilename(filename):
  212. # first, ensure @filename is _just_ the file's name
  213. filename = path.basename(filename)
  214. # strip .gpg, .gz, and .pgdump
  215. if filename[-4:] == ".gpg":
  216. filename = filename[:-4]
  217. if filename[-3:] == ".gz":
  218. filename = filename[:-3]
  219. if filename[-7:] == ".pgdump":
  220. filename = filename[:-7]
  221. sep = filename.find("__")
  222. dbname = filename[0:sep]
  223. dbdate = datetime.strptime(filename[sep+2:], arg("--savefmt"))
  224. return dbname, dbdate
  225. # find most recent backup in list of backups
  226. def findNewest(backups):
  227. if len(backups) == 1:
  228. return backups[0]
  229. match = backups[0]
  230. for backup in backups:
  231. if backup[1] > match[1]:
  232. match = backup
  233. return match
  234. # zip file and return new filename
  235. def gzipFile(absFilename):
  236. if arg("-z") or arg("--nozip"):
  237. return absFilename
  238. opts = arg("gzipOptions")
  239. if arg("-x") or arg("--noconfirm"):
  240. opts = opts + " --force --quiet"
  241. cmd("Gzipping... ", "", arg("gzipCommand") + " " + opts + " " + absFilename)
  242. return absFilename + ".gz"
  243. # unzip and return new filename
  244. def gunzipFile(absFilename):
  245. if not absFilename[-3:] == ".gz":
  246. return absFilename
  247. opts = arg("gunzipOptions")
  248. if arg("-x") or arg("--noconfirm"):
  249. opts = opts + " --force --quiet"
  250. cmd("Unzipping... ", "", arg("gunzipCommand") + " " + opts + " " + absFilename)
  251. return absFilename[:-3]
  252. # encrypt file with gpg
  253. def encryptFile(absFilename, recipient=None, password=None):
  254. if not password and not recipient:
  255. return absFilename
  256. # keep user in-the-loop
  257. if password and recipient:
  258. say("Both --gpgpass and --gpgname were supplied, but they can not be used in combination.")
  259. say("Falling back to --gpgname...")
  260. log("INFO", "User supplied --gpgpass and --gpgname, falling back to --gpgname")
  261. if recipient:
  262. command = arg("encryptCommand") + " " + arg("encryptOptions") + " -o " + absFilename + ".gpg -r " + recipient + " -e " + absFilename
  263. else:
  264. command = arg("encryptCommand") + " " + arg("encryptOptions") + " -o " + absFilename + ".gpg --passphrase " + password + " -c " + absFilename
  265. cmd("Encrypting... ", "", command)
  266. # cleanup the non-encrypted base file
  267. cmd("Cleaning up... ", "", "rm -f " + absFilename)
  268. return absFilename + ".gpg"
  269. # decrypt file with gpg
  270. def decryptFile(filename):
  271. command = arg("decryptCommand") + " " + arg("decryptOptions") + " -o " + absFilename[:-4] + " -d " + absFilename
  272. cmd("Decrypting... ", "", command)
  273. cmd("Cleaning up... ", "", "rm -f " + filename)
  274. return filename[:-4]
  275. # dump database using password auth
  276. def dumpDatabasePassword(dbname, filename, username, password, host, port):
  277. # dump database
  278. command = arg("dumpCommand") + " --dbname=postgresql://" + username + ":" + password + "@" + host + ":" + port + "/" + dbname + " " + arg("dumpOptions") + " -f " + filename
  279. logstr = username + ":[password]@" + host + ":" + port + "/" + dbname + " -> " + filename
  280. cmd("Dumping database... ", logstr, command)
  281. # gzipped for extra $$
  282. return filename
  283. # dump database using peer auth
  284. def dumpDatabasePeer(dbname, filename, username, port):
  285. # dump database
  286. command = arg("dumpCommand") + " -d " + dbname + " -U " + username + " -p " + port + " " + arg("dumpOptions") + " -f " + filename
  287. logstr = username + ":[peer]@localhost:" + port + "/" + dbname + " -> " + filename
  288. cmd("Dumping database... ", logstr, command)
  289. # gzipped for extra $$
  290. return filename
  291. # restore database using password auth
  292. def restoreDatabasePassword(dbname, filename, username, password, host, port):
  293. # check that the user really does want to do the thing...
  294. if not promptYesNo("Restore to `" + dbname + "` from `" + filename + "`?"):
  295. return
  296. # createdb doesn't accept postgresql:// URI's, so we need to export an env variable
  297. cmd("Setting PGPASSWORD... ", "", "export PGPASSWORD=" + password)
  298. cmd("Creating database... ", dbname, "createdb -h " + arg("-h") + " -p " + arg("-p") + " -U " + arg("-u") + " " + dbname)
  299. cmd("Clearing PGPASSWORD... ", "", "unset PGPASSWORD")
  300. # restore
  301. command = arg("restoreCommand") + " --dbname=postgresql://" + username + ":" + password + "@" + host + ":" + port + "/" + dbname + " " + arg("restoreOptions") + " < " + filename
  302. cmd("Restoring database... ", filename + " -> " + dbname, command)
  303. # restore database using peer auth
  304. def restoreDatabasePeer(dbname, filename, username, port):
  305. # see restoreDatabasePassword() for details
  306. if not promptYesNo("Restore to `" + dbname + "` from `" + filename + "`?"):
  307. return
  308. cmd("Creating database... ", dbname, arg("createCommand") + " -U " + username + " -p " + port + " " + dbname + " " + arg("createOptions"))
  309. command = arg("restoreCommand") + " -U " + username + " -p " + port + " -d " + dbname + " " + arg("restoreOptions") + " < " + filename
  310. cmd("Restoring database... ", filename + " -> " + dbname, command)
  311. # upload to s3 from file
  312. def uploadToS3(absFilename, bucket, profile):
  313. # get lonely filename
  314. _, filename = path.split(absFilename)
  315. command = "aws s3 cp " + absFilename + " s3://" + bucket + "/" + filename + " --only-show-errors --profile=" + profile
  316. cmd("Uploading to S3... ", absFilename + " -> s3://" + bucket + "/", command)
  317. # download from s3 to file
  318. def downloadFromS3(bucket, folder, filename, profile):
  319. absFilename = path.abspath(folder) + "/"+ filename
  320. command = "aws s3 cp s3://" + bucket + "/" + filename + " " + absFilename + " --only-show-errors --profile=" + profile
  321. cmd("Downloading from S3... ", bucket + " -> " + absFilename, command)
  322. return absFilename
  323. # search on s3 for either date or literal string match
  324. def searchOnS3(bucket, profile, sourceDbName, date=False, name=False):
  325. say("Searching S3... ", True)
  326. # list the bucket contents
  327. command = "aws s3 ls s3://" + bucket + "/ --profile=" + profile
  328. res = subprocess.check_output(command, shell=True)
  329. log("Success", "Searching S3 for backup files")
  330. say("done")
  331. # this function should eventually match a single backup file
  332. match = None
  333. # parse the output of `aws s3 ls`
  334. backups = []
  335. for line in res.splitlines():
  336. # `aws s3 ls` returns data like:
  337. # <date> <time> <size> <filename>
  338. filename = line.split(None, 3)[3]
  339. dbname, dbdate = parseFilename(filename)
  340. if dbname == sourceDbName:
  341. backups.append([dbname, dbdate, filename])
  342. # no matches :(
  343. if len(backups) < 1:
  344. log("ERROR", "Restore failed - no matching backups found")
  345. say("Searching S3 Failed! Couldn't find any matching backups")
  346. exit(1)
  347. # `--date=xx` was used
  348. if date:
  349. targetDate = datetime.strptime(date, arg("--datefmt")).date()
  350. matches = []
  351. for backup in backups:
  352. if targetDate == backup[1].date():
  353. matches.append(backup)
  354. if len(matches) < 1:
  355. log("ERROR", "Searching S3 - Could not find a match for the date " + date)
  356. say("Searching S3 Failed! Couldn't find any files with date matching `" + date + "`")
  357. exit(1)
  358. match = findNewest(matches)
  359. # `--name=xx` was used
  360. elif name:
  361. # search by filename
  362. for backup in backups:
  363. if backup[0] == name:
  364. match = name
  365. break
  366. # user didn't specify date OR filename, search for absolute newest backup
  367. else:
  368. match = findNewest(backups)
  369. # no matches! </3
  370. if not match:
  371. log("ERROR", "Restore failed - No valid backups after dbname/date checking")
  372. say("Searching S3 Failed! Couldn't find any backups that matched your <source-db-name> or --date")
  373. exit(1)
  374. # yay
  375. log("Success", "Search S3 - Found matching file `" + match[2] + "`")
  376. return match[2]
  377. # upload to fileserver from file
  378. def uploadToServer():
  379. print("Fileserver upload/download is not implemented yet.")
  380. # download from fileserver to file
  381. def downloadFromServer():
  382. print("Fileserver upload/download is not implemented yet.")
  383. # search on fileserver for either date or literal string match
  384. def searchOnServer():
  385. print("Fileserver upload/download is not implemented yet.")
  386. # search local folder
  387. def searchLocal(directory, dbname, date=None, name=None):
  388. print("Local searching is not currently implemented, please use --name instead.")
  389. # create a backup
  390. if arg("create"):
  391. log("", "Starting a new backup-create job")
  392. # parse arguments
  393. dbUser = arg("-u")
  394. dbPass = arg("-w")
  395. dbHost = arg("-h")
  396. dbPort = arg("-p")
  397. dbName = args["<source-db>"]
  398. workdir = path.abspath(arg("--dir")).rstrip("/")
  399. if not path.isdir(workdir):
  400. say("Error! The path could not be found: `" + workdir + "`")
  401. log("ERROR", "The path could not be found `" + workdir + "`")
  402. exit(1)
  403. # generate filename with timestamp
  404. dateString = datetime.now().strftime(arg("--savefmt"))
  405. filename = dbName + "__" + dateString + ".pgdump"
  406. absFilename = path.abspath(workdir + "/" + filename)
  407. # dump to .pgdump file
  408. if arg("--peer"):
  409. absFilename = dumpDatabasePeer(dbName, absFilename, dbUser, dbPort)
  410. else:
  411. absFilename = dumpDatabasePassword(dbName, absFilename, dbUser, dbPass, dbHost, dbPort)
  412. # gzip the file
  413. absFilename = gzipFile(absFilename)
  414. # encrypt the file
  415. absFilename = encryptFile(absFilename, arg("--gpgname"), arg("--gpgpass"))
  416. # upload to S3
  417. if arg("all") or arg("s3"):
  418. uploadToS3(absFilename, arg("--bucket"), arg("--profile"))
  419. # upload to fileserver
  420. if arg("all") or arg("fileserver"):
  421. print("TODO: ...me!")
  422. # restore a backup
  423. elif arg("restore"):
  424. log("", "Starting a new backup-restore job")
  425. # parse arguments
  426. dbUser = arg("-u")
  427. dbPass = arg("-w")
  428. dbHost = arg("-h")
  429. dbPort = arg("-p")
  430. dbName = args["<dest-db>"]
  431. dbSourceName = args["<source-db-name>"]
  432. workdir = path.abspath(arg("--dir")).rstrip("/")
  433. if not path.isdir(workdir):
  434. say("Error! The path could not be found: `" + workdir + "`")
  435. log("FATAL ERROR", "The path could not be found `" + workdir + "`")
  436. exit(1)
  437. absFilename = False
  438. # restore from S3 bucket
  439. if arg("s3"):
  440. # check which match-type the user selected
  441. filename = searchOnS3(arg("--bucket"), arg("--profile"), dbSourceName, date=arg("--date"), name=arg("--name"))
  442. # found a backup, prompt for confirmation
  443. say("Found matching backup: `" + filename + "`", silent=False)
  444. if not promptYesNo("Would you like to restore it?"):
  445. log("Success", "Search S3 - User chose not to restore, exiting...")
  446. exit(0)
  447. # download
  448. absFilename = downloadFromS3(arg("--bucket"), workdir, filename, arg("--profile"))
  449. # decrypt
  450. absFilename = decryptFile(absFilename)
  451. # unzip
  452. absFilename = gunzipFile(absFilename)
  453. elif arg("fileserver"):
  454. print("Fileserver uploading has no been implemented yet.")
  455. if arg("local"):
  456. absFilename = searchLocal(workdir, dbSourceName, date=arg("--date"), name=arg("--name"))
  457. # found a backup, prompt for confirmation
  458. say("Found matching backup: `" + filename + "`", silent=False)
  459. if not promptYesNo("Would you like to restore it?"):
  460. log("Success", "Search S3 - User chose not to restore, exiting...")
  461. exit(0)
  462. # check that we actually downloaded a file
  463. if not absFilename:
  464. log("ERROR", "Database download failed for `" + dbname + "`")
  465. say("Error downloading database backup!")
  466. exit(1)
  467. # restore!
  468. if arg("--peer"):
  469. restoreDatabasePeer(dbName, absFilename, dbUser, dbPort)
  470. else:
  471. restoreDatabasePassword(dbName, absFilename, dbUser, dbPass, dbHost, dbPort)
  472. elif arg("push") or arg("pull") or arg("list"):
  473. print("The `push`, `pull`, and `list` commands have not been implemented yet.")
  474. # clean old backups ()
  475. if arg("cleanup") or arg("alwaysCleanup"):
  476. print("The cleanup feature has no been implemented yet.")