123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- import botocore
- import ckan.plugins as plugins
- import ckan.plugins.toolkit as toolkit
- import ckan.logic as logic
- from pylons import config
- import boto3
- from botocore.client import Config
- import ckan.model as model
- from ckan.common import request, c
- import ckan.lib.helpers as h
- from logging import getLogger
- import json
- log = getLogger(__name__)
- NO_CREDENTIALS_MESSAGE = "Amazon AWS credentials not set up for boto. "
- "Please refer to https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration"
- BAD_CREDENTIALS_MESSAGE = "Amazon AWS credentials not authorized. "
- "Please refer to https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration"
- def get_s3_role():
- return config.get('ckanext.s3multipart.s3_role', None)
- def get_s3_bucket():
- return config.get('ckanext.s3multipart.s3_bucket', None)
- def get_s3_region():
- return config.get('ckanext.s3multipart.s3_region', None)
- def get_s3_prefix(dataset_name):
- context = {'model': model, 'session': model.Session,
- 'user': c.user or c.author, 'auth_user_obj': c.userobj,
- 'save': 'save' in request.params}
- dataset = toolkit.get_action('package_show')(context, {'id': dataset_name})
- prefix = config.get('ckanext.s3multipart.s3_prefix', '')
- org_prefix = True # config.get('ckanext.s3multipart.s3_org_prefix', '')
- if prefix != '':
- prefix = prefix + "/"
- if org_prefix != '':
- prefix = prefix + dataset.get('owner_org', '') + "/"
- return prefix + dataset.get('id', '') + "/"
- def _get_policy(dataset_name):
- # http://blogs.aws.amazon.com//security/post/Tx1P2T3LFXXCNB5/Writing-IAM-policies-Grant-access-to-user-specific-folders-in-an-Amazon-span-cla
- return json.dumps({
- "Version": "2012-10-17",
- "Statement": [
- {
- "Sid": "AllowUserFolderOperations",
- "Effect": "Allow",
- "Action": [
- "s3:GetObject*",
- "s3:GetBucketLocation",
- "s3:PutObject*",
- "s3:DeleteObject",
- "s3:*Multipart*"
- ],
- "Resource": "arn:aws:s3:::" + get_s3_bucket() + "/" + get_s3_prefix(dataset_name) + "*"
- },
- {
- "Sid": "AllowListingOfUserFolder",
- "Action": ["s3:ListBucket"],
- "Effect": "Allow",
- "Resource": "arn:aws:s3:::" + get_s3_bucket(),
- "Condition": {"StringLike": {"s3:prefix": [get_s3_prefix(dataset_name)]}}
- },
- {
- "Sid": "FindMyBucket",
- "Effect": "Allow",
- "Action": "s3:ListAllMyBuckets",
- "Resource": "arn:aws:s3:::*",
- "Condition": {"StringLike": {"s3:prefix": [get_s3_bucket()]}}
- },
- {
- "Sid": "AllowRootListingWithoutPrefix",
- "Action": [
- "s3:ListBucket"
- ],
- "Effect": "Allow",
- "Resource": [
- "arn:aws:s3:::" + get_s3_bucket()
- ],
- "Condition": {
- "Null": {
- "s3:prefix": "true"
- },
- "StringEquals": {
- "s3:delimiter": [
- "/"
- ]
- }
- }
- }
- ]
- })
- def get_session_credentials(dataset_name):
- if dataset_name == '':
- return {'error': 'no dataset name/id specified'}
- if c.pkg_dict:
- pkg_dict = c.pkg_dict
- else:
- context = {'model': model, 'session': model.Session,
- 'user': c.user or c.author, 'auth_user_obj': c.userobj,
- 'save': 'save' in request.params}
- data_dict = {'id': dataset_name, 'include_tracking': False}
- pkg_dict = logic.get_action('package_show')(context, data_dict)
- if not pkg_dict or 'organization' not in pkg_dict \
- or pkg_dict['organization'].get('name', None) \
- not in config.get('ckanext.s3multipart.enabled_orgs', '').split():
- return {'error': 'organization not activated for s3 use'}
- context = {'model': model, 'session': model.Session,
- 'user': c.user or c.author, 'auth_user_obj': c.userobj,
- 'save': 'save' in request.params}
- try:
- logic.check_access('package_create', context)
- logic.check_access('package_update', context, {'id': dataset_name})
- sess = boto3.Session()
- sts_connection = sess.client('sts')
- assume_role_object = sts_connection.assume_role(RoleArn=get_s3_role(),
- RoleSessionName=(c.user + "@" + config.get('ckan.site_id', ''))[
- :32], DurationSeconds=(60*60*6),
- Policy=_get_policy(dataset_name))
- assume_role_object['Credentials']['Expiration'] = str(assume_role_object['Credentials']['Expiration'])
- return assume_role_object
- except botocore.exceptions.NoCredentialsError:
- log.error(NO_CREDENTIALS_MESSAGE)
- h.flash_error(NO_CREDENTIALS_MESSAGE)
- return {'error': NO_CREDENTIALS_MESSAGE}
- except logic.NotAuthorized:
- log.error(BAD_CREDENTIALS_MESSAGE)
- h.flash_error(BAD_CREDENTIALS_MESSAGE)
- return {'error': BAD_CREDENTIALS_MESSAGE}
- def get_presigned_post(dataset_name):
- if dataset_name == '':
- return {'error': 'no dataset name/id specified'}
- try:
- context = {'model': model, 'session': model.Session,
- 'user': c.user or c.author, 'auth_user_obj': c.userobj,
- 'save': 'save' in request.params}
- data_dict = {'id': dataset_name, 'include_tracking': False}
- pkg_dict = logic.get_action('package_show')(context, data_dict)
- if not pkg_dict or 'organization' not in pkg_dict \
- or pkg_dict['organization'].get('name', None) \
- not in config.get('ckanext.s3multipart.enabled_orgs', '').split():
- return {'error': 'organization not activated for s3 use'}
- logic.check_access('package_create', context)
- logic.check_access('package_update', context, {'id': dataset_name})
- s3 = boto3.client('s3', region_name=get_s3_region(), config=Config(signature_version='s3v4'))
- # Make sure everything posted is publicly readable
- fields = {"acl": "public-read"}
- # Ensure that the ACL isn't changed
- conditions = [
- {"acl": "public-read"},
- # ["content-length-range", 10, 100]
- ]
- # Generate the POST attributes
- post = s3.generate_presigned_post(Bucket=get_s3_bucket(), Key=get_s3_prefix(dataset_name) + "${filename}",
- Fields=fields, Conditions=conditions, ExpiresIn=3600)
- # demonstrate an example using curl command line tool
- #
- # make sure the file is at the end of the POST payload
- # else you get "Bucket POST must contain a field named 'key'.
- # If it is specified, please check the order of the fields."
- curl_example = 'curl -v '
- for k, v in post['fields'].items():
- curl_example += ' -F "%s=%s" ' % (k, v.replace('$', '\$'))
- curl_example += ' -F "file=@filename" %s' % post['url']
- post['curl_example'] = curl_example
- return post
- except logic.NotFound:
- return {'error': 'dataset not found'}
- except botocore.exceptions.NoCredentialsError:
- log.error(NO_CREDENTIALS_MESSAGE)
- h.flash_error(NO_CREDENTIALS_MESSAGE)
- return {'error': NO_CREDENTIALS_MESSAGE}
- except logic.NotAuthorized:
- log.error(BAD_CREDENTIALS_MESSAGE)
- h.flash_error(BAD_CREDENTIALS_MESSAGE)
- return {'error': BAD_CREDENTIALS_MESSAGE}
- class S3MultipartPlugin(plugins.SingletonPlugin):
- plugins.implements(plugins.IConfigurer)
- plugins.implements(plugins.ITemplateHelpers)
- plugins.implements(plugins.IRoutes, inherit=True)
- def before_map(self, map):
- map.connect('/api/3/action/get_s3_auth/{dataset}',
- controller='ckanext.s3multipart.controller:S3MultipartController', action='s3_auth')
- map.connect('/api/3/action/get_s3_post/{dataset}',
- controller='ckanext.s3multipart.controller:S3MultipartController', action='s3_post')
- return map
- ## ITemplateHelpers
- def get_helpers(self):
- return {
- 'get_s3_bucket': get_s3_bucket,
- 'get_s3_region': get_s3_region,
- 'get_s3_prefix': get_s3_prefix,
- 'get_session_credentials': get_session_credentials
- }
- # IConfigurer
- def update_config(self, config_):
- toolkit.add_template_directory(config_, 'templates')
- toolkit.add_public_directory(config_, 'public')
- toolkit.add_resource('fanstatic', 'ckanext-s3multipart')
|