command.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. from ckan.lib.cli import CkanCommand
  2. from ckan.lib.uploader import get_storage_path
  3. from pylons import config
  4. from ckan import model
  5. import os
  6. import boto.s3.key as s3key
  7. import boto.s3.connection as s3connection
  8. import ckan.lib.munge as munge
  9. import logging
  10. log = logging.getLogger()
  11. class s3archiveCommand(CkanCommand):
  12. '''CKAN s3archive Extension
  13. Usage::
  14. paster s3archive archive -c <path to config file>
  15. The commands should be run from the ckanext-s3archive directory.
  16. '''
  17. summary = __doc__.split('\n')[0]
  18. usage = __doc__
  19. def command(self):
  20. '''
  21. Parse command line arguments and call appropriate method.
  22. '''
  23. if not self.args or self.args[0] in ['--help', '-h', 'help']:
  24. print (s3archiveCommand.__doc__)
  25. return
  26. cmd = self.args[0]
  27. self._load_config()
  28. if cmd == 'archive':
  29. self.archive()
  30. else:
  31. log.error('Command "%s" not recognized' % (cmd,))
  32. def archive(self):
  33. access_key = config.get('ckanext.s3archive.access_key')
  34. secret_key = config.get('ckanext.s3archive.secret_key')
  35. bucket_name = config.get('ckanext.s3archive.bucket')
  36. if not access_key:
  37. print ('ckanext.s3archive.access_key config argument not set')
  38. return
  39. if not secret_key:
  40. print ('ckanext.s3archive.secret_key config argument not set')
  41. return
  42. if not bucket_name:
  43. print ('ckanext.s3archive.bucket config argument not set')
  44. return
  45. storage_path = get_storage_path()
  46. if not storage_path:
  47. print ('ckan.storage_path not set in config')
  48. return
  49. resource_path = os.path.join(storage_path, 'resources')
  50. def walk(bucket, dir, files):
  51. for file in files:
  52. full_path = os.path.join(resource_path, dir, file)
  53. if not os.path.isfile(full_path) or full_path.endswith('~'):
  54. continue
  55. key_name = full_path[len(resource_path):]
  56. for key in bucket.list(prefix=key_name.lstrip('/')):
  57. key.delete()
  58. resource_id = key_name.replace('/', '')
  59. resource = model.Resource.get(resource_id)
  60. if not resource:
  61. continue
  62. last_part = resource.url.split('/')[-1]
  63. file_name = munge.munge_filename(last_part)
  64. key_name = key_name + '/' + file_name
  65. key = s3key.Key(bucket)
  66. key.key = key_name
  67. key.set_contents_from_filename(full_path)
  68. print ('Archived %s' % key_name)
  69. os.remove(full_path)
  70. conn = s3connection.S3Connection(access_key, secret_key)
  71. bucket = conn.get_bucket(bucket_name)
  72. try:
  73. os.path.walk(resource_path, walk, bucket)
  74. finally:
  75. conn.close()