tasks_1.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. def archive_resource(context, resource, log, result=None, url_timeout=30):
  2. """
  3. Archive the given resource. Moves the file from the temporary location
  4. given in download().
  5. Params:
  6. result - result of the download(), containing keys: length, saved_file
  7. If there is a failure, raises ArchiveError.
  8. Returns: {cache_filepath, cache_url}
  9. """
  10. from ckanext.archiver import default_settings as settings
  11. relative_archive_path = os.path.join(resource['id'][:2], resource['id'])
  12. archive_dir = os.path.join(settings.ARCHIVE_DIR, relative_archive_path)
  13. if not os.path.exists(archive_dir):
  14. os.makedirs(archive_dir)
  15. # try to get a file name from the url
  16. parsed_url = urlparse(resource.get('url'))
  17. try:
  18. file_name = parsed_url.path.split('/')[-1] or 'resource'
  19. file_name = file_name.strip() # trailing spaces cause problems
  20. except Exception:
  21. file_name = "resource"
  22. # move the temp file to the resource's archival directory
  23. saved_file = os.path.join(archive_dir, file_name)
  24. shutil.move(result['saved_file'], saved_file)
  25. log.info('Going to do chmod: %s', saved_file)
  26. try:
  27. os.chmod(saved_file, 0o644) # allow other users to read it
  28. except Exception as e:
  29. log.error('chmod failed %s: %s', saved_file, e)
  30. raise
  31. log.info('Archived resource as: %s', saved_file)
  32. # calculate the cache_url
  33. if not context.get('cache_url_root'):
  34. log.warning('Not saved cache_url because no value for '
  35. 'ckanext-archiver.cache_url_root in config')
  36. raise ArchiveError(_('No value for ckanext-archiver.cache_url_root in config'))
  37. cache_url = urljoin(str(context['cache_url_root']),
  38. '%s/%s' % (str(relative_archive_path), str(file_name)))
  39. return {'cache_filepath': saved_file,
  40. 'cache_url': cache_url}