123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192 |
- import argparse,os,time,math,re
- from threading import Thread
- from boto.s3.connection import S3Connection
- from boto.s3.key import Key
- """
- With this class / command you can upload a directory recursively,
- or a single level deep, or just a single file.
- """
- """
- Upload a directory:
- s3uploader.py \
- --api "MYAPIKEY" \
- --secret "MYSECRETKEY" \
- --bucket "mybucket" \
- --bucketpath "path/on/bucket/" \
- --source "local/path/" \
- --recursive \
- --ignoredates \
- --threads 3
- """
- """
- Upload a file:
- s3uploader.py \
- --api "MYAPIKEY" \
- --secret "MYSECRETKEY" \
- --bucket "mybucket" \
- --bucketpath "path/on/bucket/myfile.json" \
- --source "local/path/myfile.json" \
- --ignoredates
- """
- class S3Uploader:
- api = None
- secret = None
- dryrun = False
- filetype_meta = {} #=> dictionary of array of dictionaries. Example: ({"css":[{"content-type":"text/css"}]})
-
- def __init__(self,api,secret):
- self.api = api
- self.secret = secret
- self._init_default_metas()
- def _init_default_metas(self):
- self.set_metadata_for_filetype("css",{"Content-Type":"text/css"})
- self.set_metadata_for_filetype("html",{"Content-Type":"text/html"})
- self.set_metadata_for_filetype("js",{"Content-Type":"application/javascript"})
- self.set_metadata_for_filetype("jpg",{"Content-Type":"image/jpeg"})
- self.set_metadata_for_filetype("jpeg",{"Content-Type":"image/jpeg"})
- self.set_metadata_for_filetype("json",{"Content-Type":"application/json"})
- self.set_metadata_for_filetype("mp4",{"Content-Type":"video/mp4"})
- self.set_metadata_for_filetype("ogg",{"Content-Type":"application/ogg"})
- self.set_metadata_for_filetype("otf",{"Content-Type":"application/x-font-otf"})
- self.set_metadata_for_filetype("png",{"Content-Type":"image/png"})
- self.set_metadata_for_filetype("txt",{"Content-Type":"text/plain"})
- self.set_metadata_for_filetype("webm",{"Content-Type":"video/webm"})
- self.set_metadata_for_filetype("xml",{"Content-Type":"application/xml"})
- self.set_metadata_for_filetype("zip",{"Content-Type":"application/zip"})
- def set_metadata_for_filetype(self,filetype,meta):
- mta = self.get_metadata_for_filtetype(filetype)
- if mta:
- mta.append(meta)
- else:
- self.filetype_meta[filetype] = []
- self.filetype_meta[filetype].append(meta)
- def get_metadata_for_filtetype(self,filetype):
- return self.filetype_meta.get(filetype,None)
- def upload_dir(self,bucket,bucketbasepath,dirsource,recursive,threadcount,ignoredates):
- files = []
- if recursive: files = self._get_files_recursive(bucketbasepath,dirsource)
- else: files = self._get_files(bucketbasepath,dirsource)
- threads = []
- if(threadcount > 1):
- c = math.ceil(float(len(files))/threadcount)
- thread_pairs = []
- for i in range(int(threadcount)):
- thread_pairs.append([])
- for j in range(int(c)):
- try:
- pair = files.pop()
- thread_pairs[i].append(pair)
- except Exception as e:
- pass
- for i in range(threadcount):
- pairs = thread_pairs[i]
- thread = Thread(target=self._upload_dir,args=(bucket,bucketbasepath,dirsource,pairs,ignoredates))
- threads.append(thread)
- thread.start()
- elif threadcount == 1:
- thread = Thread(target=self._upload_dir,args=(bucket,bucketbasepath,dirsource,files,ignoredates))
- threads.append(thread)
- thread.start()
- for thread in threads:
- thread.join()
- def upload_file(self,bucket,bucketpath,filepath,ignoredates):
- s3connection = S3Connection(self.api,self.secret)
- s3bucket = s3connection.get_bucket(bucket)
- fullpath = os.path.relpath(os.path.abspath(filepath))
- filename = fullpath.split("/")[-1]
- bucketfile = "%s/%s" % (bucketpath,filename)
- bucketfile = re.sub("/%s$"%filename,"",bucketfile)
- self._upload_file(s3connection,s3bucket,fullpath,bucketfile,ignoredates)
- def _upload_file(self,connection,bucket,localfile,bucketfile,ignoredates):
- s3key = bucket.get_key(bucketfile)
- if not s3key:
- s3key = Key(bucket)
- s3key.key = bucketfile
- s3date = s3key.get_metadata("date")
- if s3date: s3date = int(s3date)
- lcdate = int(os.path.getmtime(localfile))
- upload = False
- if not s3date: upload = True
- if s3date and lcdate > s3date: upload = True
- if ignoredates: upload = True
- if not upload: return ## don't upload, return
- if self.dryrun: print ("dry-run. %s : %s => %s" % (bucket.name,localfile,bucketfile))
- else: print ("%s : %s => %s" % (bucket.name,localfile,bucketfile))
- filetype = localfile.split(".")[-1]
- meta = self.get_metadata_for_filtetype(filetype)
- if meta:
- for metadata in meta:
- for key in metadata:
- print (" => metdata: %s:%s" % (key,metadata[key]))
- if not self.dryrun:
- s3key.set_metadata(key,metadata[key])
- if not self.dryrun:
- s3key.set_metadata("date",str(int(time.time())))
- s3key.set_contents_from_filename(localfile)
-
- def _upload_dir(self,bucket,bucketpath,dirsource,files,ignoredates):
- s3connection = S3Connection(self.api,self.secret)
- s3bucket = s3connection.get_bucket(bucket)
- for pair in files:
- localfile = pair[0]
- bucketfile = pair[1]
- self._upload_file(s3connection,s3bucket,localfile,bucketfile,ignoredates)
- def _get_files(self,bucketpath,dirsource):
- #returns a list of tuples = >[(localFilePath,bucketPath), (localFilepath,bucketPath)]
- #this only reads files in the source directory and isn't recursive
- files = []
- dirsource = os.path.relpath(os.path.abspath(dirsource))
- for filename in os.listdir(dirsource):
- flpath = os.path.join(dirsource,filename)
- abspath = os.path.abspath(flpath)
- flpath = os.path.relpath(abspath)
- if os.path.isdir(flpath): continue
- bktflname = "%s%s" % (bucketpath,filename)
- bktflname = re.sub("//","/",bktflname)
- files.append((flpath,bktflname))
- return files
-
- def _get_files_recursive(self,bucketpath,dirsource):
- #returns a list of tuples = >[(localFilePath,bucketPath), (localFilepath,bucketPath)]
- #but this also recurses into directories.
- files = []
- dirsource = os.path.relpath(os.path.abspath(dirsource))
- for dirname, dirnames, filenames in os.walk(dirsource):
- for filename in filenames:
- localpath = os.path.join(dirname,filename)
- bucketlocal = re.sub("^"+dirsource,"",localpath)
- bucket = "%s%s" % (bucketpath,bucketlocal)
- bucket = re.sub("//","/",bucket)
- files.append((localpath,bucket))
- return files
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("-a","--api",help="S3 API Key")
- parser.add_argument("-s","--secret",help="S3 Secret")
- parser.add_argument("-d","--source",help="Source directory")
- parser.add_argument("-f","--file",help="Source file to upload")
- parser.add_argument("-b","--bucket",help="S3 Bucket")
- parser.add_argument("-p","--bucketpath",help="S3 Bucket path. Use as a base path for directories. An absolute path for files.")
- parser.add_argument("-r","--recursive",action="store_true",help="Whether to recurse into local source directory")
- parser.add_argument("-i","--ignoredates",action="store_true",help="Ignore modified dates and upload all files")
- parser.add_argument("-t","--threads",default=1,type=int,help="The number of threads to use to upload files")
- args = parser.parse_args()
- uploader = S3Uploader(args.api,args.secret)
- if args.source:
- uploader.upload_dir(args.bucket,args.bucketpath,args.source,args.recursive,args.threads,args.ignoredates)
- elif args.file:
- uploader.upload_file(args.bucket,args.bucketpath,args.file,args.ignoredates)
- else:
- print ("py-s3uploader -h")
|