12345678910111213141516171819202122232425262728293031323334353637383940 |
- import hashlib
- import os
- # Returns the hash string of the given file name
- def hashFile(filename):
- # For large files, if we read it all together it can lead to memory overflow, So we take a blocksize to read at a time
- BLOCKSIZE = 65536
- hasher = hashlib.md5()
- with open(filename, 'rb') as file:
- # Reads the particular blocksize from file
- buf = file.read(BLOCKSIZE)
- while(len(buf) > 0):
- hasher.update(buf)
- buf = file.read(BLOCKSIZE)
- return hasher.hexdigest()
- if __name__ == "__main__":
- # Dictionary to store the hash and filename
- hashMap = {}
- # List to store deleted files
- deletedFiles = []
- filelist = [f for f in os.listdir() if os.path.isfile(f)]
- for f in filelist:
- key = hashFile(f)
- # If key already exists, it deletes the file
- if key in hashMap.keys():
- deletedFiles.append(f)
- os.remove(f)
- else:
- hashMap[key] = f
- if len(deletedFiles) != 0:
- print('Deleted Files')
- for i in deletedFiles:
- print(i)
- else:
- print('No duplicate files found')
|