def migrate_archiver_dirs(): from ckan import model from ckan.logic import get_action site_user = get_action('get_site_user')( {'model': model, 'ignore_auth': True, 'defer_commit': True}, {} ) site_url_base = config['ckanext-archiver.cache_url_root'].rstrip('/') old_dir_regex = re.compile(r'(.*)/([a-f0-9\-]+)/([^/]*)$') new_dir_regex = re.compile(r'(.*)/[a-f0-9]{2}/[a-f0-9\-]{36}/[^/]*$') for resource in model.Session.query(model.Resource). \ filter(model.Resource.state != model.State.DELETED): if not resource.cache_url or resource.cache_url == 'None': continue if new_dir_regex.match(resource.cache_url): print('Resource with new url already: %s' % resource.cache_url) continue match = old_dir_regex.match(resource.cache_url) if not match: print('ERROR Could not match url: %s' % resource.cache_url) continue url_base, res_id, filename = match.groups() # check the package isn't deleted # Need to refresh the resource's session resource = model.Session.query(model.Resource).get(resource.id) if p.toolkit.check_ckan_version(max_version='2.2.99'): package = None if resource.resource_group: package = resource.resource_group.package else: package = resource.package if package and package.state == model.State.DELETED: print('Package is deleted') continue if url_base != site_url_base: print('ERROR Base URL is incorrect: %r != %r' % (url_base, site_url_base)) continue # move the file filepath_base = config['ckanext-archiver.archive_dir'] old_path = os.path.join(filepath_base, resource.id) new_dir = os.path.join(filepath_base, resource.id[:2]) new_path = os.path.join(filepath_base, resource.id[:2], resource.id) new_filepath = os.path.join(new_path, filename) if not os.path.exists(new_dir): os.mkdir(new_dir) if os.path.exists(new_path) and not os.path.exists(old_path): print('File already moved: %s' % new_path) else: print('File: "%s" -> "%s"' % (old_path, new_path)) try: shutil.move(old_path, new_path) except IOError as e: print('ERROR moving resource: %s' % e) continue # change the cache_url and cache_filepath new_cache_url = '/'.join((url_base, res_id[:2], res_id, filename)) print('cache_filepath: "%s" -> "%s"' % (resource.extras.get('cache_filepath'), new_filepath)) print('cache_url: "%s" -> "%s"' % (resource.cache_url, new_cache_url)) context = {'model': model, 'user': site_user['name'], 'ignore_auth': True, 'session': model.Session} data_dict = {'id': resource.id} res_dict = get_action('resource_show')(context, data_dict) res_dict['cache_filepath'] = new_filepath res_dict['cache_url'] = new_cache_url data_dict = res_dict result = get_action('resource_update')(context, data_dict) if result.get('id') == res_id: print('Successfully updated resource') else: print('ERROR updating resource: %r' % result)