photo_hash.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from __future__ import division
  2. from PIL import Image
  3. def hash_distance(left_hash, right_hash):
  4. """Compute the hamming distance between two hashes"""
  5. if len(left_hash) != len(right_hash):
  6. raise ValueError('Hamming distance requires two strings of equal length')
  7. return sum(map(lambda x: 0 if x[0] == x[1] else 1, zip(left_hash, right_hash)))
  8. def hashes_are_similar(left_hash, right_hash, tolerance=6):
  9. """
  10. Return True if the hamming distance between
  11. the image hashes are less than the given tolerance.
  12. """
  13. return hash_distance(left_hash, right_hash) <= tolerance
  14. def average_hash(image_path, hash_size=8):
  15. """ Compute the average hash of the given image. """
  16. with open(image_path, 'rb') as f:
  17. # Open the image, resize it and convert it to black & white.
  18. image = Image.open(f).resize((hash_size, hash_size), Image.ANTIALIAS).convert('L')
  19. pixels = list(image.getdata())
  20. avg = sum(pixels) / len(pixels)
  21. # Compute the hash based on each pixels value compared to the average.
  22. bits = "".join(map(lambda pixel: '1' if pixel > avg else '0', pixels))
  23. hashformat = "0{hashlength}x".format(hashlength=hash_size ** 2 // 4)
  24. return int(bits, 2).__format__(hashformat)
  25. def distance(image_path, other_image_path):
  26. """ Compute the hamming distance between two images"""
  27. image_hash = average_hash(image_path)
  28. other_image_hash = average_hash(other_image_path)
  29. return hash_distance(image_hash, other_image_hash)
  30. def is_look_alike(image_path, other_image_path, tolerance=6):
  31. image_hash = average_hash(image_path)
  32. other_image_hash = average_hash(other_image_path)
  33. return hashes_are_similar(image_hash, other_image_hash, tolerance)