plugin.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. import botocore
  2. import ckan.plugins as plugins
  3. import ckan.plugins.toolkit as toolkit
  4. import ckan.logic as logic
  5. from pylons import config
  6. import boto3
  7. from botocore.client import Config
  8. import ckan.model as model
  9. from ckan.common import request, c
  10. import ckan.lib.helpers as h
  11. from logging import getLogger
  12. import json
  13. log = getLogger(__name__)
  14. NO_CREDENTIALS_MESSAGE = "Amazon AWS credentials not set up for boto. "
  15. "Please refer to https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration"
  16. BAD_CREDENTIALS_MESSAGE = "Amazon AWS credentials not authorized. "
  17. "Please refer to https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration"
  18. def get_s3_role():
  19. return config.get('ckanext.s3multipart.s3_role', None)
  20. def get_s3_bucket():
  21. return config.get('ckanext.s3multipart.s3_bucket', None)
  22. def get_s3_region():
  23. return config.get('ckanext.s3multipart.s3_region', None)
  24. def get_s3_prefix(dataset_name):
  25. context = {'model': model, 'session': model.Session,
  26. 'user': c.user or c.author, 'auth_user_obj': c.userobj,
  27. 'save': 'save' in request.params}
  28. dataset = toolkit.get_action('package_show')(context, {'id': dataset_name})
  29. prefix = config.get('ckanext.s3multipart.s3_prefix', '')
  30. org_prefix = True # config.get('ckanext.s3multipart.s3_org_prefix', '')
  31. if prefix != '':
  32. prefix = prefix + "/"
  33. if org_prefix != '':
  34. prefix = prefix + dataset.get('owner_org', '') + "/"
  35. return prefix + dataset.get('id', '') + "/"
  36. def _get_policy(dataset_name):
  37. # http://blogs.aws.amazon.com//security/post/Tx1P2T3LFXXCNB5/Writing-IAM-policies-Grant-access-to-user-specific-folders-in-an-Amazon-span-cla
  38. return json.dumps({
  39. "Version": "2012-10-17",
  40. "Statement": [
  41. {
  42. "Sid": "AllowUserFolderOperations",
  43. "Effect": "Allow",
  44. "Action": [
  45. "s3:GetObject*",
  46. "s3:GetBucketLocation",
  47. "s3:PutObject*",
  48. "s3:DeleteObject",
  49. "s3:*Multipart*"
  50. ],
  51. "Resource": "arn:aws:s3:::" + get_s3_bucket() + "/" + get_s3_prefix(dataset_name) + "*"
  52. },
  53. {
  54. "Sid": "AllowListingOfUserFolder",
  55. "Action": ["s3:ListBucket"],
  56. "Effect": "Allow",
  57. "Resource": "arn:aws:s3:::" + get_s3_bucket(),
  58. "Condition": {"StringLike": {"s3:prefix": [get_s3_prefix(dataset_name)]}}
  59. },
  60. {
  61. "Sid": "FindMyBucket",
  62. "Effect": "Allow",
  63. "Action": "s3:ListAllMyBuckets",
  64. "Resource": "arn:aws:s3:::*",
  65. "Condition": {"StringLike": {"s3:prefix": [get_s3_bucket()]}}
  66. },
  67. {
  68. "Sid": "AllowRootListingWithoutPrefix",
  69. "Action": [
  70. "s3:ListBucket"
  71. ],
  72. "Effect": "Allow",
  73. "Resource": [
  74. "arn:aws:s3:::" + get_s3_bucket()
  75. ],
  76. "Condition": {
  77. "Null": {
  78. "s3:prefix": "true"
  79. },
  80. "StringEquals": {
  81. "s3:delimiter": [
  82. "/"
  83. ]
  84. }
  85. }
  86. }
  87. ]
  88. })
  89. def get_session_credentials(dataset_name):
  90. if dataset_name == '':
  91. return {'error': 'no dataset name/id specified'}
  92. if c.pkg_dict:
  93. pkg_dict = c.pkg_dict
  94. else:
  95. context = {'model': model, 'session': model.Session,
  96. 'user': c.user or c.author, 'auth_user_obj': c.userobj,
  97. 'save': 'save' in request.params}
  98. data_dict = {'id': dataset_name, 'include_tracking': False}
  99. pkg_dict = logic.get_action('package_show')(context, data_dict)
  100. if not pkg_dict or 'organization' not in pkg_dict \
  101. or pkg_dict['organization'].get('name', None) \
  102. not in config.get('ckanext.s3multipart.enabled_orgs', '').split():
  103. return {'error': 'organization not activated for s3 use'}
  104. context = {'model': model, 'session': model.Session,
  105. 'user': c.user or c.author, 'auth_user_obj': c.userobj,
  106. 'save': 'save' in request.params}
  107. try:
  108. logic.check_access('package_create', context)
  109. logic.check_access('package_update', context, {'id': dataset_name})
  110. sess = boto3.Session()
  111. sts_connection = sess.client('sts')
  112. assume_role_object = sts_connection.assume_role(RoleArn=get_s3_role(),
  113. RoleSessionName=(c.user + "@" + config.get('ckan.site_id', ''))[
  114. :32], DurationSeconds=(60*60*6),
  115. Policy=_get_policy(dataset_name))
  116. assume_role_object['Credentials']['Expiration'] = str(assume_role_object['Credentials']['Expiration'])
  117. return assume_role_object
  118. except botocore.exceptions.NoCredentialsError:
  119. log.error(NO_CREDENTIALS_MESSAGE)
  120. h.flash_error(NO_CREDENTIALS_MESSAGE)
  121. return {'error': NO_CREDENTIALS_MESSAGE}
  122. except logic.NotAuthorized:
  123. log.error(BAD_CREDENTIALS_MESSAGE)
  124. h.flash_error(BAD_CREDENTIALS_MESSAGE)
  125. return {'error': BAD_CREDENTIALS_MESSAGE}
  126. def get_presigned_post(dataset_name):
  127. if dataset_name == '':
  128. return {'error': 'no dataset name/id specified'}
  129. try:
  130. context = {'model': model, 'session': model.Session,
  131. 'user': c.user or c.author, 'auth_user_obj': c.userobj,
  132. 'save': 'save' in request.params}
  133. data_dict = {'id': dataset_name, 'include_tracking': False}
  134. pkg_dict = logic.get_action('package_show')(context, data_dict)
  135. if not pkg_dict or 'organization' not in pkg_dict \
  136. or pkg_dict['organization'].get('name', None) \
  137. not in config.get('ckanext.s3multipart.enabled_orgs', '').split():
  138. return {'error': 'organization not activated for s3 use'}
  139. logic.check_access('package_create', context)
  140. logic.check_access('package_update', context, {'id': dataset_name})
  141. s3 = boto3.client('s3', region_name=get_s3_region(), config=Config(signature_version='s3v4'))
  142. # Make sure everything posted is publicly readable
  143. fields = {"acl": "public-read"}
  144. # Ensure that the ACL isn't changed
  145. conditions = [
  146. {"acl": "public-read"},
  147. # ["content-length-range", 10, 100]
  148. ]
  149. # Generate the POST attributes
  150. post = s3.generate_presigned_post(Bucket=get_s3_bucket(), Key=get_s3_prefix(dataset_name) + "${filename}",
  151. Fields=fields, Conditions=conditions, ExpiresIn=3600)
  152. # demonstrate an example using curl command line tool
  153. #
  154. # make sure the file is at the end of the POST payload
  155. # else you get "Bucket POST must contain a field named 'key'.
  156. # If it is specified, please check the order of the fields."
  157. curl_example = 'curl -v '
  158. for k, v in post['fields'].items():
  159. curl_example += ' -F "%s=%s" ' % (k, v.replace('$', '\$'))
  160. curl_example += ' -F "file=@filename" %s' % post['url']
  161. post['curl_example'] = curl_example
  162. return post
  163. except logic.NotFound:
  164. return {'error': 'dataset not found'}
  165. except botocore.exceptions.NoCredentialsError:
  166. log.error(NO_CREDENTIALS_MESSAGE)
  167. h.flash_error(NO_CREDENTIALS_MESSAGE)
  168. return {'error': NO_CREDENTIALS_MESSAGE}
  169. except logic.NotAuthorized:
  170. log.error(BAD_CREDENTIALS_MESSAGE)
  171. h.flash_error(BAD_CREDENTIALS_MESSAGE)
  172. return {'error': BAD_CREDENTIALS_MESSAGE}
  173. class S3MultipartPlugin(plugins.SingletonPlugin):
  174. plugins.implements(plugins.IConfigurer)
  175. plugins.implements(plugins.ITemplateHelpers)
  176. plugins.implements(plugins.IRoutes, inherit=True)
  177. def before_map(self, map):
  178. map.connect('/api/3/action/get_s3_auth/{dataset}',
  179. controller='ckanext.s3multipart.controller:S3MultipartController', action='s3_auth')
  180. map.connect('/api/3/action/get_s3_post/{dataset}',
  181. controller='ckanext.s3multipart.controller:S3MultipartController', action='s3_post')
  182. return map
  183. ## ITemplateHelpers
  184. def get_helpers(self):
  185. return {
  186. 'get_s3_bucket': get_s3_bucket,
  187. 'get_s3_region': get_s3_region,
  188. 'get_s3_prefix': get_s3_prefix,
  189. 'get_session_credentials': get_session_credentials
  190. }
  191. # IConfigurer
  192. def update_config(self, config_):
  193. toolkit.add_template_directory(config_, 'templates')
  194. toolkit.add_public_directory(config_, 'public')
  195. toolkit.add_resource('fanstatic', 'ckanext-s3multipart')