py-s3uploader.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. import argparse,os,time,math,re
  2. from threading import Thread
  3. from boto.s3.connection import S3Connection
  4. from boto.s3.key import Key
  5. """
  6. With this class / command you can upload a directory recursively,
  7. or a single level deep, or just a single file.
  8. """
  9. """
  10. Upload a directory:
  11. s3uploader.py \
  12. --api "MYAPIKEY" \
  13. --secret "MYSECRETKEY" \
  14. --bucket "mybucket" \
  15. --bucketpath "path/on/bucket/" \
  16. --source "local/path/" \
  17. --recursive \
  18. --ignoredates \
  19. --threads 3
  20. """
  21. """
  22. Upload a file:
  23. s3uploader.py \
  24. --api "MYAPIKEY" \
  25. --secret "MYSECRETKEY" \
  26. --bucket "mybucket" \
  27. --bucketpath "path/on/bucket/myfile.json" \
  28. --source "local/path/myfile.json" \
  29. --ignoredates
  30. """
  31. class S3Uploader:
  32. api = None
  33. secret = None
  34. dryrun = False
  35. filetype_meta = {} #=> dictionary of array of dictionaries. Example: ({"css":[{"content-type":"text/css"}]})
  36. def __init__(self,api,secret):
  37. self.api = api
  38. self.secret = secret
  39. self._init_default_metas()
  40. def _init_default_metas(self):
  41. self.set_metadata_for_filetype("css",{"Content-Type":"text/css"})
  42. self.set_metadata_for_filetype("html",{"Content-Type":"text/html"})
  43. self.set_metadata_for_filetype("js",{"Content-Type":"application/javascript"})
  44. self.set_metadata_for_filetype("jpg",{"Content-Type":"image/jpeg"})
  45. self.set_metadata_for_filetype("jpeg",{"Content-Type":"image/jpeg"})
  46. self.set_metadata_for_filetype("json",{"Content-Type":"application/json"})
  47. self.set_metadata_for_filetype("mp4",{"Content-Type":"video/mp4"})
  48. self.set_metadata_for_filetype("ogg",{"Content-Type":"application/ogg"})
  49. self.set_metadata_for_filetype("otf",{"Content-Type":"application/x-font-otf"})
  50. self.set_metadata_for_filetype("png",{"Content-Type":"image/png"})
  51. self.set_metadata_for_filetype("txt",{"Content-Type":"text/plain"})
  52. self.set_metadata_for_filetype("webm",{"Content-Type":"video/webm"})
  53. self.set_metadata_for_filetype("xml",{"Content-Type":"application/xml"})
  54. self.set_metadata_for_filetype("zip",{"Content-Type":"application/zip"})
  55. def set_metadata_for_filetype(self,filetype,meta):
  56. mta = self.get_metadata_for_filtetype(filetype)
  57. if mta:
  58. mta.append(meta)
  59. else:
  60. self.filetype_meta[filetype] = []
  61. self.filetype_meta[filetype].append(meta)
  62. def get_metadata_for_filtetype(self,filetype):
  63. return self.filetype_meta.get(filetype,None)
  64. def upload_dir(self,bucket,bucketbasepath,dirsource,recursive,threadcount,ignoredates):
  65. files = []
  66. if recursive: files = self._get_files_recursive(bucketbasepath,dirsource)
  67. else: files = self._get_files(bucketbasepath,dirsource)
  68. threads = []
  69. if(threadcount > 1):
  70. c = math.ceil(float(len(files))/threadcount)
  71. thread_pairs = []
  72. for i in range(int(threadcount)):
  73. thread_pairs.append([])
  74. for j in range(int(c)):
  75. try:
  76. pair = files.pop()
  77. thread_pairs[i].append(pair)
  78. except Exception as e:
  79. pass
  80. for i in range(threadcount):
  81. pairs = thread_pairs[i]
  82. thread = Thread(target=self._upload_dir,args=(bucket,bucketbasepath,dirsource,pairs,ignoredates))
  83. threads.append(thread)
  84. thread.start()
  85. elif threadcount == 1:
  86. thread = Thread(target=self._upload_dir,args=(bucket,bucketbasepath,dirsource,files,ignoredates))
  87. threads.append(thread)
  88. thread.start()
  89. for thread in threads:
  90. thread.join()
  91. def upload_file(self,bucket,bucketpath,filepath,ignoredates):
  92. s3connection = S3Connection(self.api,self.secret)
  93. s3bucket = s3connection.get_bucket(bucket)
  94. fullpath = os.path.relpath(os.path.abspath(filepath))
  95. filename = fullpath.split("/")[-1]
  96. bucketfile = "%s/%s" % (bucketpath,filename)
  97. bucketfile = re.sub("/%s$"%filename,"",bucketfile)
  98. self._upload_file(s3connection,s3bucket,fullpath,bucketfile,ignoredates)
  99. def _upload_file(self,connection,bucket,localfile,bucketfile,ignoredates):
  100. s3key = bucket.get_key(bucketfile)
  101. if not s3key:
  102. s3key = Key(bucket)
  103. s3key.key = bucketfile
  104. s3date = s3key.get_metadata("date")
  105. if s3date: s3date = int(s3date)
  106. lcdate = int(os.path.getmtime(localfile))
  107. upload = False
  108. if not s3date: upload = True
  109. if s3date and lcdate > s3date: upload = True
  110. if ignoredates: upload = True
  111. if not upload: return ## don't upload, return
  112. if self.dryrun: print ("dry-run. %s : %s => %s" % (bucket.name,localfile,bucketfile))
  113. else: print ("%s : %s => %s" % (bucket.name,localfile,bucketfile))
  114. filetype = localfile.split(".")[-1]
  115. meta = self.get_metadata_for_filtetype(filetype)
  116. if meta:
  117. for metadata in meta:
  118. for key in metadata:
  119. print (" => metdata: %s:%s" % (key,metadata[key]))
  120. if not self.dryrun:
  121. s3key.set_metadata(key,metadata[key])
  122. if not self.dryrun:
  123. s3key.set_metadata("date",str(int(time.time())))
  124. s3key.set_contents_from_filename(localfile)
  125. def _upload_dir(self,bucket,bucketpath,dirsource,files,ignoredates):
  126. s3connection = S3Connection(self.api,self.secret)
  127. s3bucket = s3connection.get_bucket(bucket)
  128. for pair in files:
  129. localfile = pair[0]
  130. bucketfile = pair[1]
  131. self._upload_file(s3connection,s3bucket,localfile,bucketfile,ignoredates)
  132. def _get_files(self,bucketpath,dirsource):
  133. #returns a list of tuples = >[(localFilePath,bucketPath), (localFilepath,bucketPath)]
  134. #this only reads files in the source directory and isn't recursive
  135. files = []
  136. dirsource = os.path.relpath(os.path.abspath(dirsource))
  137. for filename in os.listdir(dirsource):
  138. flpath = os.path.join(dirsource,filename)
  139. abspath = os.path.abspath(flpath)
  140. flpath = os.path.relpath(abspath)
  141. if os.path.isdir(flpath): continue
  142. bktflname = "%s%s" % (bucketpath,filename)
  143. bktflname = re.sub("//","/",bktflname)
  144. files.append((flpath,bktflname))
  145. return files
  146. def _get_files_recursive(self,bucketpath,dirsource):
  147. #returns a list of tuples = >[(localFilePath,bucketPath), (localFilepath,bucketPath)]
  148. #but this also recurses into directories.
  149. files = []
  150. dirsource = os.path.relpath(os.path.abspath(dirsource))
  151. for dirname, dirnames, filenames in os.walk(dirsource):
  152. for filename in filenames:
  153. localpath = os.path.join(dirname,filename)
  154. bucketlocal = re.sub("^"+dirsource,"",localpath)
  155. bucket = "%s%s" % (bucketpath,bucketlocal)
  156. bucket = re.sub("//","/",bucket)
  157. files.append((localpath,bucket))
  158. return files
  159. if __name__ == "__main__":
  160. parser = argparse.ArgumentParser()
  161. parser.add_argument("-a","--api",help="S3 API Key")
  162. parser.add_argument("-s","--secret",help="S3 Secret")
  163. parser.add_argument("-d","--source",help="Source directory")
  164. parser.add_argument("-f","--file",help="Source file to upload")
  165. parser.add_argument("-b","--bucket",help="S3 Bucket")
  166. parser.add_argument("-p","--bucketpath",help="S3 Bucket path. Use as a base path for directories. An absolute path for files.")
  167. parser.add_argument("-r","--recursive",action="store_true",help="Whether to recurse into local source directory")
  168. parser.add_argument("-i","--ignoredates",action="store_true",help="Ignore modified dates and upload all files")
  169. parser.add_argument("-t","--threads",default=1,type=int,help="The number of threads to use to upload files")
  170. args = parser.parse_args()
  171. uploader = S3Uploader(args.api,args.secret)
  172. if args.source:
  173. uploader.upload_dir(args.bucket,args.bucketpath,args.source,args.recursive,args.threads,args.ignoredates)
  174. elif args.file:
  175. uploader.upload_file(args.bucket,args.bucketpath,args.file,args.ignoredates)
  176. else:
  177. print ("py-s3uploader -h")