change_storage_class.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #
  2. # Copyright (C) 2012 David Raffensperger
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
  5. # to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
  6. # and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7. #
  8. # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9. #
  10. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  11. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  12. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  13. # IN THE SOFTWARE.
  14. #
  15. #
  16. #
  17. #
  18. # Program to Syncronize a local directory with an S3 directory/bucket
  19. # David Raffensperger
  20. # Created May 2011
  21. # Part of it requires PyCrypto
  22. # To install PyCrypto on Windows, go to: http://www.voidspace.org.uk/python/modules.shtml#pycrypto
  23. # Ideas:
  24. # Run it every day but partition the files to upload many only once per month (would need to save date of last file,
  25. # could hash names mod frequency)
  26. # Check the modified times for whole directories and traverse down to find the changed files - don't think that really works in Vista/NTFS
  27. # Compress large files
  28. # Try to optimize big files that change a lot into archived vs. current
  29. # Only upload files that have changed a lot (a big size change)
  30. # Ignore files over a certain size unless they are in a specified list
  31. #
  32. # More ideas:
  33. # Make it not use os.walk, and make it use stat to do a walk, and get the modified times of files
  34. # Add better exception handling
  35. # Use multi-delete S3 operation
  36. # Have multiple TCP connections and upload multiple files at a time?
  37. # Do disk operations in parallel with network operations?
  38. # Consider cases of files that get modified in the midst of running the script
  39. # Make it so that it doesn't need to store the local map, just process each local file as it goes
  40. # Make it update the cache file after every file is updated, or maybe every 10 or something, so that it can keep info if it crashes
  41. # Make a way to sort of uses a hybrid of getting server info and using local cache, so MD5's don't need to be corrected, but file can get back in sync.
  42. # Make a GUI with wxPython
  43. # Make it an Open Source project on SourceForge
  44. # Make it check that things were deleted and copied successfully
  45. # Work to have it have better error handling and check more things against S3 and with the local file system, make it more fault-tolerant
  46. # More advanced status stuff that shows total amount of stuff to upload
  47. import sys
  48. from pprint import pprint
  49. from s3_helpers import *
  50. def main(args):
  51. if len(args) < 4:
  52. log("Expected parametes: access_key [secret_key|decrypt:encypted_key|encrypt:secret_key] remote_bucket_and_prefix storage_class")
  53. exit(-1)
  54. access_key = args[0]
  55. secret_key = args[1]
  56. dec_prefix = 'decrypt:'
  57. enc_prefix = 'encrypt:'
  58. if secret_key.startswith(dec_prefix):
  59. log('Password to access AWS:')
  60. password = getpass('')
  61. secret_key = decrypt_secret_key(password, secret_key[len(dec_prefix):])
  62. elif secret_key.startswith(enc_prefix):
  63. log('Password to encrypt AWS secret key:')
  64. password = getpass('')
  65. secret_key = encrypt_secret_key(password, secret_key[len(enc_prefix):])
  66. log('Key Encrypted as: ' + secret_key)
  67. log('Run again with decrypt:[encrypted secret key (above)] to sync to S3.')
  68. exit(-1)
  69. bucket = args[2]
  70. storage_class = args[3]
  71. remote_map = get_remote_file_map(access_key, secret_key, bucket, "")
  72. file_objs = list(remote_map.by_path.values())
  73. for file_obj in file_objs:
  74. if file_obj.storage_class != storage_class:
  75. path = "/" + bucket + "/" + file_obj.path
  76. amz_headers = {}
  77. amz_headers['x-amz-copy-source'] = path
  78. amz_headers['x-amz-metadata-directive'] = 'COPY'
  79. amz_headers['x-amz-storage-class'] = storage_class
  80. try:
  81. status = s3_operation(access_key, secret_key, "PUT", path, "", amz_headers)
  82. if status == 200:
  83. log("Changed to " + storage_class + " storage: " + str(path))
  84. else:
  85. log("Tried to change storage class of: " + str(path) + " but failed")
  86. except:
  87. log("Error changing storage class of: " + str(path) + " : " + str(sys.exc_info()[0]))
  88. close_log_file()
  89. if __name__ == "__main__":
  90. main(sys.argv[1:])