def archive_service(self, service): """Actually do the archiving step for the given Service """ # Create the base directory for this service, i.e. where we put logs. base_dir = os.path.join(self.base_dir, service.name, service.host) if not os.path.exists(base_dir): os.makedirs(base_dir) if "" not in service.pattern: # We ignore services that don't have a in their pattern print ("Warning:", service.name, "does not include date. Ignoring.") # Connect to remote client = SSHClient() # TODO: Use something other than auto add policy? client.set_missing_host_key_policy(AutoAddPolicy()) client.connect( service.host, username=service.account, compress=True, allow_agent=self.use_ssh_agent, ) # Fetch list of files from the remote glob = service.pattern.replace("", "????-??-??") cmd = FIND_COMMAND_TEMPLATE % { "dir": service.directory, "glob": glob, } _, stdout, _ = client.exec_command(cmd) files = stdout.readlines() files[:] = list(f.strip() for f in files) files.sort() # Filter the files to ones we want to archive files = filter_by_age( files, lambda d: d.days > service.days_to_keep_on_remote ) # For each file download to a pending file name (optionally gzipping) # and only after it has succesfully been downloaded do we optionally # delete from the remote. sftp = client.open_sftp() for file_name in files: local_name = os.path.join(base_dir, os.path.basename(file_name)) if not file_name.endswith(".gz"): local_name += ".gz" pending_name = local_name + ".download" if os.path.exists(pending_name): os.remove(pending_name) if os.path.exists(local_name): print ("Warning: ", local_name, "already exists") continue # Set up progress bar for downloads if self.verbose: widgets = [ os.path.basename(file_name), " ", progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed(), ] pb = progressbar.ProgressBar(widgets=widgets) def progress_cb(bytes_downloaded, total_size): pb.max_value = total_size pb.update(bytes_downloaded) else: def progress_cb(bytes_downloaded, total_size): pass if self.verbose or self.dry_run: print ("Archiving: %s:%s to %s" % ( service.host, file_name, local_name, ) ) if not self.dry_run: # If filename does not end with '.gz' then we compress while # we download # TODO: Should we be preserving last modified times? if not file_name.endswith(".gz"): with gzip.open(pending_name, 'wb', compresslevel=9) as f: sftp.getfo(file_name, f, callback=progress_cb) else: sftp.get(file_name, pending_name, callback=progress_cb) if self.verbose: pb.finish() os.rename(pending_name, local_name) if self.remove: if self.verbose: print ("Removing remote") sftp.remove(file_name) sftp.close() client.close() # We now go and delete any files that are older than the retention # period, if specified if service.retention_period_days: local_files = list( os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(base_dir) for filename in filenames ) files_to_delete = filter_by_age( local_files, lambda d: d.days > service.retention_period_days ) for file_name in files_to_delete: if self.verbose or self.dry_run: print ("Deleting file due to retention policy: %s" % ( file_name, )) if not self.dry_run: os.remove(file_name)