123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- def archive_service(self, service):
- """Actually do the archiving step for the given Service
- """
- # Create the base directory for this service, i.e. where we put logs.
- base_dir = os.path.join(self.base_dir, service.name, service.host)
- if not os.path.exists(base_dir):
- os.makedirs(base_dir)
- if "<DATE->" not in service.pattern:
- # We ignore services that don't have a <DATE-> in their pattern
- print ("Warning:", service.name, "does not include date. Ignoring.")
- # Connect to remote
- client = SSHClient()
- # TODO: Use something other than auto add policy?
- client.set_missing_host_key_policy(AutoAddPolicy())
- client.connect(
- service.host,
- username=service.account,
- compress=True,
- allow_agent=self.use_ssh_agent,
- )
- # Fetch list of files from the remote
- glob = service.pattern.replace("<DATE->", "????-??-??")
- cmd = FIND_COMMAND_TEMPLATE % {
- "dir": service.directory,
- "glob": glob,
- }
- _, stdout, _ = client.exec_command(cmd)
- files = stdout.readlines()
- files[:] = list(f.strip() for f in files)
- files.sort()
- # Filter the files to ones we want to archive
- files = filter_by_age(
- files,
- lambda d: d.days > service.days_to_keep_on_remote
- )
- # For each file download to a pending file name (optionally gzipping)
- # and only after it has succesfully been downloaded do we optionally
- # delete from the remote.
- sftp = client.open_sftp()
- for file_name in files:
- local_name = os.path.join(base_dir, os.path.basename(file_name))
- if not file_name.endswith(".gz"):
- local_name += ".gz"
- pending_name = local_name + ".download"
- if os.path.exists(pending_name):
- os.remove(pending_name)
- if os.path.exists(local_name):
- print ("Warning: ", local_name, "already exists")
- continue
- # Set up progress bar for downloads
- if self.verbose:
- widgets = [
- os.path.basename(file_name), " ",
- progressbar.Percentage(),
- ' ', progressbar.Bar(),
- ' ', progressbar.ETA(),
- ' ', progressbar.FileTransferSpeed(),
- ]
- pb = progressbar.ProgressBar(widgets=widgets)
- def progress_cb(bytes_downloaded, total_size):
- pb.max_value = total_size
- pb.update(bytes_downloaded)
- else:
- def progress_cb(bytes_downloaded, total_size):
- pass
- if self.verbose or self.dry_run:
- print ("Archiving: %s:%s to %s" % (
- service.host, file_name, local_name,
- )
- )
- if not self.dry_run:
- # If filename does not end with '.gz' then we compress while
- # we download
- # TODO: Should we be preserving last modified times?
- if not file_name.endswith(".gz"):
- with gzip.open(pending_name, 'wb', compresslevel=9) as f:
- sftp.getfo(file_name, f, callback=progress_cb)
- else:
- sftp.get(file_name, pending_name, callback=progress_cb)
- if self.verbose:
- pb.finish()
- os.rename(pending_name, local_name)
- if self.remove:
- if self.verbose:
- print ("Removing remote")
- sftp.remove(file_name)
- sftp.close()
- client.close()
- # We now go and delete any files that are older than the retention
- # period, if specified
- if service.retention_period_days:
- local_files = list(
- os.path.join(dirpath, filename)
- for dirpath, _, filenames in os.walk(base_dir)
- for filename in filenames
- )
- files_to_delete = filter_by_age(
- local_files,
- lambda d: d.days > service.retention_period_days
- )
- for file_name in files_to_delete:
- if self.verbose or self.dry_run:
- print ("Deleting file due to retention policy: %s" % (
- file_name,
- ))
- if not self.dry_run:
- os.remove(file_name)
|