123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- #!/usr/bin/env python
- import sys
- import os
- import math
- from termcolor import colored
- if len(sys.argv) > 1:
- trunc = int(sys.argv[1])
- else:
- trunc = 7
- def birthday(people,days):
- """The chance of a number of people sharing a birthday
- in a year with a certian number of days."""
- return 1 - math.prod([days - i for i in range(0,people)]) / ( days ** people )
- def birthday_threshold(threshold,days):
- """How many people you need for the collision probability
- to reach the threshold, as a fraction of 1."""
- return int(1/2 * (1 + math.sqrt(4*(2 * days * math.log(1/(1 - threshold))) + 1)))
- def max_hashes(trunc):
- return 16 ** trunc
- commits = os.popen('git log --pretty=format:"%H %aI %s"').read().split("\n")
- hashes = [s.split()[0] for s in commits]
- dates = [s.split()[1] for s in commits]
- subjects = [s.split(" ", 2)[2] for s in commits]
- trunc_hashes = [s[0:trunc] for s in hashes]
- dict_hashes = {}
- for h in enumerate(trunc_hashes):
- if h[1] not in dict_hashes:
- dict_hashes[h[1]]=[h[0]]
- else:
- dict_hashes[h[1]].append(h[0])
- dict_duplicates = dict(filter(lambda elem: len(elem[1]) > 1,dict_hashes.items()))
- def printmatch(string,length):
- print(colored(string[0:length], "red") , string[length:],sep="")
- print ("For truncation length", colored(trunc,"red") ,"there are", end=" ")
- if len(dict_duplicates) == 0:
- print(colored("no","yellow"),"commits with the same hash!")
- else:
- print(colored("{:d}".format(len(dict_duplicates)),"cyan"), "commits pairs with the same hash! They are:\n")
- for el in dict_duplicates.items():
- for idx in el[1]:
- print("-"*40)
- print(colored(subjects[idx],"blue"))
- print(colored(dates[idx],"yellow"))
- printmatch(hashes[idx],trunc)
- print("-"*40)
- print("")
- print("There are", len(dict_duplicates), "collisions in total.")
- print("The probability of a collision happening at least once is", end=" ")
- print(colored("{:.2f}%".format(birthday(len(commits),max_hashes(trunc)) * 100), "green"))
- print("For this probability to reach 1%, you'd need", colored("{:d}".format(birthday_threshold(0.01,max_hashes(trunc))),"cyan"), "commits.")
- print("For this probability to reach 10%, you'd need", colored("{:d}".format(birthday_threshold(0.1,max_hashes(trunc))),"cyan"), "commits.")
- print("For this probability to reach 50%, you'd need", colored("{:d}".format(birthday_threshold(0.50,max_hashes(trunc))),"cyan"), "commits.")
|