12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- from __future__ import division
- from PIL import Image
- def hash_distance(left_hash, right_hash):
- """Compute the hamming distance between two hashes"""
- if len(left_hash) != len(right_hash):
- raise ValueError('Hamming distance requires two strings of equal length')
- return sum(map(lambda x: 0 if x[0] == x[1] else 1, zip(left_hash, right_hash)))
- def hashes_are_similar(left_hash, right_hash, tolerance=6):
- """
- Return True if the hamming distance between
- the image hashes are less than the given tolerance.
- """
- return hash_distance(left_hash, right_hash) <= tolerance
- def average_hash(image_path, hash_size=8):
- """ Compute the average hash of the given image. """
- with open(image_path, 'rb') as f:
- # Open the image, resize it and convert it to black & white.
- image = Image.open(f).resize((hash_size, hash_size), Image.ANTIALIAS).convert('L')
- pixels = list(image.getdata())
- avg = sum(pixels) / len(pixels)
- # Compute the hash based on each pixels value compared to the average.
- bits = "".join(map(lambda pixel: '1' if pixel > avg else '0', pixels))
- hashformat = "0{hashlength}x".format(hashlength=hash_size ** 2 // 4)
- return int(bits, 2).__format__(hashformat)
- def distance(image_path, other_image_path):
- """ Compute the hamming distance between two images"""
- image_hash = average_hash(image_path)
- other_image_hash = average_hash(other_image_path)
- return hash_distance(image_hash, other_image_hash)
- def is_look_alike(image_path, other_image_path, tolerance=6):
- image_hash = average_hash(image_path)
- other_image_hash = average_hash(other_image_path)
- return hashes_are_similar(image_hash, other_image_hash, tolerance)
|