matching_with_mismatches.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # python3
  2. import sys
  3. import random
  4. import fileinput
  5. ## fro preComputeHash and getHashValue function refer to the 4th program - substring equality
  6. ## compute the hash values for all the combinations in the given string
  7. def preComputeHash(string, m, x):
  8. string_length = len(string)
  9. hash_values = [0]
  10. for i in range(1, string_length + 1):
  11. # subtracting the ASCII code of 'a' to maintain the value of the strings within 0 to 26
  12. val = ((x * hash_values[i - 1]) % m + (ord(string[i - 1]) - ord('a')) % m) % m
  13. hash_values.append(val)
  14. return hash_values
  15. ## return the hash value using the precomputed hash values of the given index and length
  16. def getHashValue(hash_values, index_start, length, x, m, power_multiplier):
  17. return (hash_values[index_start + length] % m - (power_multiplier * hash_values[index_start]) % m) % m
  18. """
  19. Precompute power of X with module of M1 and M2. It can buy you extra time when you try
  20. to compute hash of substring. Otherwise, it will cost you extra O(log(n)) for pow(X, n, M).
  21. """
  22. def getPower(x,length,m):
  23. pow_lst = []
  24. for i in range(length+1):
  25. pow_lst.append(pow(x,i,m))
  26. return pow_lst
  27. class Mismatches:
  28. def __init__(self, text, pattern):
  29. self.text = text
  30. self.pattern = pattern
  31. self.m1 = pow(10, 15) + 7
  32. self.m2 = pow(10, 15) + 456
  33. self.x = 26
  34. self.pattern_length = len(pattern)
  35. self.hash_values_t1 = preComputeHash(self.text, self.m1, self.x)
  36. self.hash_values_t2 = preComputeHash(self.text, self.m2, self.x)
  37. self.hash_values_p1 = preComputeHash(self.pattern, self.m1, self.x)
  38. self.hash_values_p2 = preComputeHash(self.pattern, self.m2, self.x)
  39. self.power_lst_m1 = getPower(self.x, self.pattern_length, self.m1)
  40. self.power_lst_m2 = getPower(self.x, self.pattern_length, self.m2)
  41. def solve(self, k, text, pattern):
  42. pattern_length = len(pattern)
  43. text_length = len(text)
  44. result = []
  45. for i in range(text_length-pattern_length+1):
  46. value = self.checkMatches(text[i:i+pattern_length], pattern, k, i, i+pattern_length, 0, pattern_length)
  47. if value <= k:
  48. result.append(i)
  49. return result
  50. def checkMatches(self, subtext, pattern, k, start_index_t, end_index_t, start_index_p, end_index_p):
  51. mismatch_count = 0
  52. left = 0
  53. right = len(pattern)
  54. mid = left + (right - left)//2
  55. #print(" ",subtext, pattern, start_index_t, end_index_t, start_index_p, end_index_p) #left, right, mid, mismatch_count)
  56. if mid == 0:
  57. if ord(subtext) != ord(pattern):
  58. return 1
  59. else:
  60. return 0
  61. if self.checkHash(start_index_t, end_index_t, start_index_p, end_index_p):
  62. #mismatch_count = mismatch_count + self.checkMatches(subtext[mid:right], pattern[mid:right], k)
  63. return mismatch_count
  64. else:
  65. mid_index_t = (start_index_t + end_index_t)//2 ## uses this index to get the has values of the pattern from the original text
  66. mid_index_p = (start_index_p + end_index_p)//2 ## keep track of the index for the pattern to get the hash values
  67. mismatch_count = self.checkMatches(subtext[left:mid], pattern[left:mid], k, start_index_t, mid_index_t, start_index_p, mid_index_p) + \
  68. self.checkMatches(subtext[mid:right], pattern[mid:right], k, mid_index_t, end_index_t, mid_index_p, end_index_p)
  69. return mismatch_count
  70. #return mismatch_count
  71. def checkHash(self, index_start_t, index_end_t, index_start_p, index_end_p):
  72. length = index_end_t - index_start_t
  73. hash_value_st1 = getHashValue(self.hash_values_t1, index_start_t, length, self.x, self.m1, self.power_lst_m1[length])
  74. hash_value_st2 = getHashValue(self.hash_values_t2, index_start_t, length, self.x, self.m2, self.power_lst_m2[length])
  75. hash_value_sp1 = getHashValue(self.hash_values_p1, index_start_p, length, self.x, self.m1, self.power_lst_m1[length])
  76. hash_value_sp2 = getHashValue(self.hash_values_p2, index_start_p, length, self.x, self.m2, self.power_lst_m2[length])
  77. return (hash_value_st1 == hash_value_sp1 and hash_value_st2 == hash_value_sp2)
  78. for line in fileinput.input():
  79. if line != '\n':
  80. k, t, p = line.split()
  81. process = Mismatches(t,p)
  82. ans = process.solve(int(k), t, p)
  83. print(len(ans), *ans)