archiver_4_3.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. def archive_service(self, service):
  2. """Actually do the archiving step for the given Service
  3. """
  4. # Create the base directory for this service, i.e. where we put logs.
  5. base_dir = os.path.join(self.base_dir, service.name, service.host)
  6. if not os.path.exists(base_dir):
  7. os.makedirs(base_dir)
  8. if "<DATE->" not in service.pattern:
  9. # We ignore services that don't have a <DATE-> in their pattern
  10. print ("Warning:", service.name, "does not include date. Ignoring.")
  11. # Connect to remote
  12. client = SSHClient()
  13. # TODO: Use something other than auto add policy?
  14. client.set_missing_host_key_policy(AutoAddPolicy())
  15. client.connect(
  16. service.host,
  17. username=service.account,
  18. compress=True,
  19. allow_agent=self.use_ssh_agent,
  20. )
  21. # Fetch list of files from the remote
  22. glob = service.pattern.replace("<DATE->", "????-??-??")
  23. cmd = FIND_COMMAND_TEMPLATE % {
  24. "dir": service.directory,
  25. "glob": glob,
  26. }
  27. _, stdout, _ = client.exec_command(cmd)
  28. files = stdout.readlines()
  29. files[:] = list(f.strip() for f in files)
  30. files.sort()
  31. # Filter the files to ones we want to archive
  32. files = filter_by_age(
  33. files,
  34. lambda d: d.days > service.days_to_keep_on_remote
  35. )
  36. # For each file download to a pending file name (optionally gzipping)
  37. # and only after it has succesfully been downloaded do we optionally
  38. # delete from the remote.
  39. sftp = client.open_sftp()
  40. for file_name in files:
  41. local_name = os.path.join(base_dir, os.path.basename(file_name))
  42. if not file_name.endswith(".gz"):
  43. local_name += ".gz"
  44. pending_name = local_name + ".download"
  45. if os.path.exists(pending_name):
  46. os.remove(pending_name)
  47. if os.path.exists(local_name):
  48. print ("Warning: ", local_name, "already exists")
  49. continue
  50. # Set up progress bar for downloads
  51. if self.verbose:
  52. widgets = [
  53. os.path.basename(file_name), " ",
  54. progressbar.Percentage(),
  55. ' ', progressbar.Bar(),
  56. ' ', progressbar.ETA(),
  57. ' ', progressbar.FileTransferSpeed(),
  58. ]
  59. pb = progressbar.ProgressBar(widgets=widgets)
  60. def progress_cb(bytes_downloaded, total_size):
  61. pb.max_value = total_size
  62. pb.update(bytes_downloaded)
  63. else:
  64. def progress_cb(bytes_downloaded, total_size):
  65. pass
  66. if self.verbose or self.dry_run:
  67. print ("Archiving: %s:%s to %s" % (
  68. service.host, file_name, local_name,
  69. )
  70. )
  71. if not self.dry_run:
  72. # If filename does not end with '.gz' then we compress while
  73. # we download
  74. # TODO: Should we be preserving last modified times?
  75. if not file_name.endswith(".gz"):
  76. with gzip.open(pending_name, 'wb', compresslevel=9) as f:
  77. sftp.getfo(file_name, f, callback=progress_cb)
  78. else:
  79. sftp.get(file_name, pending_name, callback=progress_cb)
  80. if self.verbose:
  81. pb.finish()
  82. os.rename(pending_name, local_name)
  83. if self.remove:
  84. if self.verbose:
  85. print ("Removing remote")
  86. sftp.remove(file_name)
  87. sftp.close()
  88. client.close()
  89. # We now go and delete any files that are older than the retention
  90. # period, if specified
  91. if service.retention_period_days:
  92. local_files = list(
  93. os.path.join(dirpath, filename)
  94. for dirpath, _, filenames in os.walk(base_dir)
  95. for filename in filenames
  96. )
  97. files_to_delete = filter_by_age(
  98. local_files,
  99. lambda d: d.days > service.retention_period_days
  100. )
  101. for file_name in files_to_delete:
  102. if self.verbose or self.dry_run:
  103. print ("Deleting file due to retention policy: %s" % (
  104. file_name,
  105. ))
  106. if not self.dry_run:
  107. os.remove(file_name)