123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- def archive_resource(context, resource, log, result=None, url_timeout=30):
- """
- Archive the given resource. Moves the file from the temporary location
- given in download().
- Params:
- result - result of the download(), containing keys: length, saved_file
- If there is a failure, raises ArchiveError.
- Returns: {cache_filepath, cache_url}
- """
- from ckanext.archiver import default_settings as settings
- relative_archive_path = os.path.join(resource['id'][:2], resource['id'])
- archive_dir = os.path.join(settings.ARCHIVE_DIR, relative_archive_path)
- if not os.path.exists(archive_dir):
- os.makedirs(archive_dir)
- # try to get a file name from the url
- parsed_url = urlparse(resource.get('url'))
- try:
- file_name = parsed_url.path.split('/')[-1] or 'resource'
- file_name = file_name.strip() # trailing spaces cause problems
- except Exception:
- file_name = "resource"
- # move the temp file to the resource's archival directory
- saved_file = os.path.join(archive_dir, file_name)
- shutil.move(result['saved_file'], saved_file)
- log.info('Going to do chmod: %s', saved_file)
- try:
- os.chmod(saved_file, 0o644) # allow other users to read it
- except Exception as e:
- log.error('chmod failed %s: %s', saved_file, e)
- raise
- log.info('Archived resource as: %s', saved_file)
- # calculate the cache_url
- if not context.get('cache_url_root'):
- log.warning('Not saved cache_url because no value for '
- 'ckanext-archiver.cache_url_root in config')
- raise ArchiveError(_('No value for ckanext-archiver.cache_url_root in config'))
- cache_url = urljoin(str(context['cache_url_root']),
- '%s/%s' % (str(relative_archive_path), str(file_name)))
- return {'cache_filepath': saved_file,
- 'cache_url': cache_url}
|