git-birthday 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #!/usr/bin/env python
  2. import sys
  3. import os
  4. import math
  5. from termcolor import colored
  6. if len(sys.argv) > 1:
  7. trunc = int(sys.argv[1])
  8. else:
  9. trunc = 7
  10. def birthday(people,days):
  11. """The chance of a number of people sharing a birthday
  12. in a year with a certian number of days."""
  13. return 1 - math.prod([days - i for i in range(0,people)]) / ( days ** people )
  14. def birthday_threshold(threshold,days):
  15. """How many people you need for the collision probability
  16. to reach the threshold, as a fraction of 1."""
  17. return int(1/2 * (1 + math.sqrt(4*(2 * days * math.log(1/(1 - threshold))) + 1)))
  18. def max_hashes(trunc):
  19. return 16 ** trunc
  20. commits = os.popen('git log --pretty=format:"%H %aI %s"').read().split("\n")
  21. hashes = [s.split()[0] for s in commits]
  22. dates = [s.split()[1] for s in commits]
  23. subjects = [s.split(" ", 2)[2] for s in commits]
  24. trunc_hashes = [s[0:trunc] for s in hashes]
  25. dict_hashes = {}
  26. for h in enumerate(trunc_hashes):
  27. if h[1] not in dict_hashes:
  28. dict_hashes[h[1]]=[h[0]]
  29. else:
  30. dict_hashes[h[1]].append(h[0])
  31. dict_duplicates = dict(filter(lambda elem: len(elem[1]) > 1,dict_hashes.items()))
  32. def printmatch(string,length):
  33. print(colored(string[0:length], "red") , string[length:],sep="")
  34. print ("For truncation length", colored(trunc,"red") ,"there are", end=" ")
  35. if len(dict_duplicates) == 0:
  36. print(colored("no","yellow"),"commits with the same hash!")
  37. else:
  38. print(colored("{:d}".format(len(dict_duplicates)),"cyan"), "commits pairs with the same hash! They are:\n")
  39. for el in dict_duplicates.items():
  40. for idx in el[1]:
  41. print("-"*40)
  42. print(colored(subjects[idx],"blue"))
  43. print(colored(dates[idx],"yellow"))
  44. printmatch(hashes[idx],trunc)
  45. print("-"*40)
  46. print("")
  47. print("There are", len(dict_duplicates), "collisions in total.")
  48. print("The probability of a collision happening at least once is", end=" ")
  49. print(colored("{:.2f}%".format(birthday(len(commits),max_hashes(trunc)) * 100), "green"))
  50. print("For this probability to reach 1%, you'd need", colored("{:d}".format(birthday_threshold(0.01,max_hashes(trunc))),"cyan"), "commits.")
  51. print("For this probability to reach 10%, you'd need", colored("{:d}".format(birthday_threshold(0.1,max_hashes(trunc))),"cyan"), "commits.")
  52. print("For this probability to reach 50%, you'd need", colored("{:d}".format(birthday_threshold(0.50,max_hashes(trunc))),"cyan"), "commits.")