utils_1.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. def migrate_archiver_dirs():
  2. from ckan import model
  3. from ckan.logic import get_action
  4. site_user = get_action('get_site_user')(
  5. {'model': model, 'ignore_auth': True, 'defer_commit': True}, {}
  6. )
  7. site_url_base = config['ckanext-archiver.cache_url_root'].rstrip('/')
  8. old_dir_regex = re.compile(r'(.*)/([a-f0-9\-]+)/([^/]*)$')
  9. new_dir_regex = re.compile(r'(.*)/[a-f0-9]{2}/[a-f0-9\-]{36}/[^/]*$')
  10. for resource in model.Session.query(model.Resource). \
  11. filter(model.Resource.state != model.State.DELETED):
  12. if not resource.cache_url or resource.cache_url == 'None':
  13. continue
  14. if new_dir_regex.match(resource.cache_url):
  15. print('Resource with new url already: %s' % resource.cache_url)
  16. continue
  17. match = old_dir_regex.match(resource.cache_url)
  18. if not match:
  19. print('ERROR Could not match url: %s' % resource.cache_url)
  20. continue
  21. url_base, res_id, filename = match.groups()
  22. # check the package isn't deleted
  23. # Need to refresh the resource's session
  24. resource = model.Session.query(model.Resource).get(resource.id)
  25. if p.toolkit.check_ckan_version(max_version='2.2.99'):
  26. package = None
  27. if resource.resource_group:
  28. package = resource.resource_group.package
  29. else:
  30. package = resource.package
  31. if package and package.state == model.State.DELETED:
  32. print('Package is deleted')
  33. continue
  34. if url_base != site_url_base:
  35. print('ERROR Base URL is incorrect: %r != %r' % (url_base, site_url_base))
  36. continue
  37. # move the file
  38. filepath_base = config['ckanext-archiver.archive_dir']
  39. old_path = os.path.join(filepath_base, resource.id)
  40. new_dir = os.path.join(filepath_base, resource.id[:2])
  41. new_path = os.path.join(filepath_base, resource.id[:2], resource.id)
  42. new_filepath = os.path.join(new_path, filename)
  43. if not os.path.exists(new_dir):
  44. os.mkdir(new_dir)
  45. if os.path.exists(new_path) and not os.path.exists(old_path):
  46. print('File already moved: %s' % new_path)
  47. else:
  48. print('File: "%s" -> "%s"' % (old_path, new_path))
  49. try:
  50. shutil.move(old_path, new_path)
  51. except IOError as e:
  52. print('ERROR moving resource: %s' % e)
  53. continue
  54. # change the cache_url and cache_filepath
  55. new_cache_url = '/'.join((url_base, res_id[:2], res_id, filename))
  56. print('cache_filepath: "%s" -> "%s"' % (resource.extras.get('cache_filepath'), new_filepath))
  57. print('cache_url: "%s" -> "%s"' % (resource.cache_url, new_cache_url))
  58. context = {'model': model, 'user': site_user['name'], 'ignore_auth': True, 'session': model.Session}
  59. data_dict = {'id': resource.id}
  60. res_dict = get_action('resource_show')(context, data_dict)
  61. res_dict['cache_filepath'] = new_filepath
  62. res_dict['cache_url'] = new_cache_url
  63. data_dict = res_dict
  64. result = get_action('resource_update')(context, data_dict)
  65. if result.get('id') == res_id:
  66. print('Successfully updated resource')
  67. else:
  68. print('ERROR updating resource: %r' % result)