duplicatefileremover.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import hashlib
  2. import os
  3. # Returns the hash string of the given file name
  4. def hashFile(filename):
  5. # For large files, if we read it all together it can lead to memory overflow, So we take a blocksize to read at a time
  6. BLOCKSIZE = 65536
  7. hasher = hashlib.md5()
  8. with open(filename, 'rb') as file:
  9. # Reads the particular blocksize from file
  10. buf = file.read(BLOCKSIZE)
  11. while(len(buf) > 0):
  12. hasher.update(buf)
  13. buf = file.read(BLOCKSIZE)
  14. return hasher.hexdigest()
  15. if __name__ == "__main__":
  16. # Dictionary to store the hash and filename
  17. hashMap = {}
  18. # List to store deleted files
  19. deletedFiles = []
  20. filelist = [f for f in os.listdir() if os.path.isfile(f)]
  21. for f in filelist:
  22. key = hashFile(f)
  23. # If key already exists, it deletes the file
  24. if key in hashMap.keys():
  25. deletedFiles.append(f)
  26. os.remove(f)
  27. else:
  28. hashMap[key] = f
  29. if len(deletedFiles) != 0:
  30. print('Deleted Files')
  31. for i in deletedFiles:
  32. print(i)
  33. else:
  34. print('No duplicate files found')