xor.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. entry = 0x7FC66B580010
  2. head = 0x5586267F2080
  3. tail = 0x55862759E700
  4. bin(head ^ tail)
  5. hex(head ^ tail)
  6. (head ^ tail) ^ head == tail
  7. (head ^ tail) ^ tail == head
  8. # force odd
  9. for n in range(100):
  10. n |= 1
  11. print(n)
  12. bin(11400714819323198486)
  13. bin(11400714819323198485)
  14. bin(11400714819323198485 ^ 11400714819323198486)
  15. bin(11400714819323198486 ^ 10)
  16. 11400714819323198486 & 1
  17. 11400714819323198485
  18. import math
  19. int(2 ** 16 / ((1 + math.sqrt(5)) / 2))
  20. int(2 ** 32 / ((1 + math.sqrt(5)) / 2))
  21. int(2 ** 64 / ((1 + math.sqrt(5)) / 2))
  22. int(2 ** 128 / ((1 + math.sqrt(5)) / 2))
  23. # fib hash
  24. for key in range(100):
  25. (key * 11400714819323198485) >> 54
  26. fimur_hash = [
  27. 8613972110907996638,
  28. 15358817489599691552,
  29. 3656918988786498657,
  30. 10401764430870904227,
  31. 17146609988017651429,
  32. 5444711761147035686,
  33. 12189556917607727464,
  34. 487658412734448298,
  35. 7232503855452193771,
  36. 13977349419385324845,
  37. 2275450786074068591,
  38. 9020296620023908272,
  39. 15765141844109363442,
  40. 4063243553980254771,
  41. 10808088789751040885,
  42. 17552934303797120183,
  43. 5851035703310487032,
  44. 12595881237862518586,
  45. 893982664156516476,
  46. 14383673675207217919,
  47. 9426620913003140482,
  48. 16171466353514682052,
  49. 4469567715245758469,
  50. 11214413148748618055,
  51. ]
  52. murmur3_x64 = [
  53. 1584096894626881991315877671151210123721,
  54. 54336250343350542782176903032023148998,
  55. 855158039471493497310317094450549862077,
  56. 1039127549861262650713404564393879311435,
  57. 93136154732261704556933925320311062615,
  58. 74312781558375054611426997378294798738,
  59. 1807475194205470988210194190753005636218,
  60. 1619970959750794755710948967737182030885,
  61. 128772317988171237053664783826596130169,
  62. 1550580235102978885114018615696532709555,
  63. 1154421219551708742117692178646737305277,
  64. 70824873152156146165017857262847459992,
  65. 86718167958849630914322687166258693395,
  66. 5145779186172661644318506030192103308,
  67. 1112946414600426880511747594572229236485,
  68. 82534992099906578798463907845345498967,
  69. 413297339436388227811327427340427008806,
  70. 1814992898938886923516075192548260744819,
  71. 540449371664412957912350958338390321963,
  72. 38622926961033010357625350269841354667,
  73. 56363050618590338737050388589839385361,
  74. 86807990082632946749662700404111625938,
  75. 162281206466706151118429615108459332167,
  76. 170811608674138578702126145761342232142,
  77. ]
  78. # for h in fimurhash:
  79. # bin(h)
  80. def hamming(a, b, bitdepth):
  81. distance = 0
  82. for i in range(bitdepth - 1, -1, -1):
  83. bit_a = (a >> i) & 1
  84. bit_b = (b >> i) & 1
  85. distance += not(bit_a == bit_b)
  86. return distance
  87. for i, hash in enumerate(fimur_hash):
  88. distance = hamming(hash, fimur_hash[i-1], 64)
  89. print(f"distance [{i}]<->[{i-1}] \t {distance}/64")
  90. for i, hash in enumerate(murmur3_x64):
  91. distance = hamming(hash, murmur3_x64[i-1], 128)
  92. print(f"distance [{i}]<->[{i-1}] \t {distance}/128")
  93. # fimurhash
  94. # distance [0]<->[-1] =28/64
  95. # distance [1]<->[0] =35/64
  96. # distance [2]<->[1] =31/64
  97. # distance [3]<->[2] =26/64
  98. # distance [4]<->[3] =33/64
  99. # distance [5]<->[4] =37/64
  100. # distance [6]<->[5] =24/64
  101. # distance [7]<->[6] =34/64
  102. # distance [8]<->[7] =27/64
  103. # distance [9]<->[8] =28/64
  104. # distance [10]<->[9] =32/64
  105. # distance [11]<->[10] =32/64
  106. # distance [12]<->[11] =33/64
  107. # distance [13]<->[12] =23/64
  108. # distance [14]<->[13] =28/64
  109. # distance [15]<->[14] =29/64
  110. # distance [16]<->[15] =27/64
  111. # distance [17]<->[16] =34/64
  112. # distance [18]<->[17] =32/64
  113. # distance [19]<->[18] =29/64
  114. # distance [20]<->[19] =29/64
  115. # distance [21]<->[20] =31/64
  116. # distance [22]<->[21] =32/64
  117. # distance [23]<->[22] =24/64
  118. # murmur3_x64
  119. # distance [0]<->[-1] =67/128
  120. # distance [1]<->[0] =55/128
  121. # distance [2]<->[1] =66/128
  122. # distance [3]<->[2] =62/128
  123. # distance [4]<->[3] =59/128
  124. # distance [5]<->[4] =63/128
  125. # distance [6]<->[5] =66/128
  126. # distance [7]<->[6] =62/128
  127. # distance [8]<->[7] =65/128
  128. # distance [9]<->[8] =64/128
  129. # distance [10]<->[9] =70/128
  130. # distance [11]<->[10] =65/128
  131. # distance [12]<->[11] =66/128
  132. # distance [13]<->[12] =67/128
  133. # distance [14]<->[13] =59/128
  134. # distance [15]<->[14] =55/128
  135. # distance [16]<->[15] =67/128
  136. # distance [17]<->[16] =64/128
  137. # distance [18]<->[17] =65/128
  138. # distance [19]<->[18] =61/128
  139. # distance [20]<->[19] =66/128
  140. # distance [21]<->[20] =58/128
  141. # distance [22]<->[21] =63/128
  142. # distance [23]<->[22] =65/128
  143. def fimurhash(number):
  144. hash = (31 * 11400714819323198485 * (number << 15)) & 0xFFFFFFFFFFFFFFFF
  145. hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
  146. return hash
  147. def fimurhalt(number):
  148. hash = (31 * 102334155 * (number << 15)) & 0xFFFFFFFFFFFFFFFF
  149. hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
  150. return hash
  151. mean = 0
  152. pool = 10000
  153. for i in range(pool):
  154. hash_a = fimurhash(i)
  155. hash_b = fimurhash(i + 1)
  156. distance = hamming(hash_a, hash_b, 64)
  157. mean += distance/64
  158. print(f"fimurhash [{i}]<->[{i+1}] \t {distance/64} \t {(hash_a & 0xFFFFFFFFFFFFFFFF)>> (64 - 14)} ")
  159. print(f"mean = {mean/pool}")
  160. for i in range(0, 544, 34):
  161. hash_a = fimurhash(i)
  162. hash_b = fimurhash(i + 1)
  163. distance = hamming(hash_a, hash_b, 64)
  164. print(f"fimurhash [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
  165. mean = 0
  166. pool = 10000
  167. for i in range(pool):
  168. hash_a = fimurhalt(i)
  169. hash_b = fimurhalt(i + 1)
  170. distance = hamming(hash_a, hash_b, 64)
  171. mean += distance/64
  172. print(f"fimurhalt [{i}]<->[{i+1}] \t {distance/64} \t {(hash_a & 0xFFFFFFFFFFFFFFFF)>> (64 - 14)} ")
  173. print(f"mean = {mean/pool}")
  174. for i in range(0, 544, 34):
  175. hash_a = fimurhalt(i)
  176. hash_b = fimurhalt(i + 1)
  177. distance = hamming(hash_a, hash_b, 64)
  178. print(f"fimurhalt [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
  179. pool = 1000
  180. for i in range(pool):
  181. print(f"{fimurhash(i) >> 54} \t: "
  182. f"{fimurhash(fimurhash(i >> 54) + i) >> 54} \t: "
  183. f"{fimurhash(fimurhash(i >> 54) ^ i) >> 54}")
  184. # for i in range(pool):
  185. # print(f"{fimurhash(i)} \t: "
  186. # f"{fimurhash(fimurhash(i) + i)} \t: "
  187. # f"{fimurhash(fimurhash(i) ^ i)}")
  188. def fimurhash_seed(number, seed):
  189. hash = (seed * 11400714819323198485 * (number << 15)) & 0xFFFFFFFFFFFFFFFF
  190. hash = ((hash << 7) | (hash >> (32 - 7)) ) & 0xFFFFFFFFFFFFFFFF
  191. return hash
  192. # for i in range(0, 545, 34):
  193. # print(f"{i} ; "
  194. # f"{fimurhash_seed(i, 31) >> 54} ; "
  195. # f"{fimurhash_seed(i, 31 + fimurhash_seed(i, 31)) >> 54} ; "
  196. # f"{fimurhash_seed(i, 31 ^ fimurhash_seed(i, 31)) >> 54}")
  197. # nested hash
  198. n = 14
  199. top_n = 0
  200. seed = 31 << 7
  201. collisions = 0
  202. entry_count = (2**(n-1))
  203. hmap = [[] for n in range(2**top_n)]
  204. for i in range(1, entry_count):
  205. hash = fimurhash_seed(i, seed)
  206. top_level = hash >> (64 - top_n)
  207. # nested_seed =fimurhash_seed(i ^ hash, seed + hash) >> (64 - n + top_n)
  208. # nested_hash = fimurhash_seed(i, (seed * hash) & 0xFFFFFFFFFFFFFFFF )
  209. # nested_hash = fimurhash_seed(i, seed << 7)
  210. nested_hash = fimurhash_seed(i, seed)
  211. nested_level = nested_hash >> (64 - n + top_n)
  212. print(f"{i} : { hash } : { nested_hash } : {top_level} : {nested_level}")
  213. hmap[top_level].append(nested_level)
  214. for i, top_level in enumerate(hmap):
  215. entries = len(top_level)
  216. unique = len(set(top_level))
  217. collisions += entries-unique
  218. print(f"toplevel[{i}] -> {unique}/{entries} : {entries-unique} collisions")
  219. print(f"{collisions} / {entry_count} : {collisions/entry_count}")
  220. # for i in range(100):
  221. # print(f"{i} : { fimurhash_seed(i, 31) >> 54}")
  222. # def fimurhash(number):
  223. # hash = 31 * 11400714819323198486 * (number << 15)
  224. # hash = (hash << 7) | (hash >> (32 - 7))
  225. # return hash
  226. # def fimurhash(number):
  227. # hash = 31 * 62831853071 * (number << 15)
  228. # hash = (hash << 7) | (hash >> (32 - 7))
  229. # return hash
  230. # def fimurhash(number):
  231. # hash = 31 * 102334155 * (number << 15)
  232. # hash = (hash << 7) | (hash >> (32 - 7))
  233. # return hash
  234. # def fimurhash(number):
  235. # hash = 31 * 102334155 * (number << 16)
  236. # hash = (hash << 8) | (hash >> (32 - 8))
  237. # return hash
  238. # def fimurhash(number):
  239. # hash = 31 * 11400714819323198486 * (number << 16)
  240. # hash = (hash << 8) | (hash >> (32 - 8))
  241. # return hash
  242. # def fimurhash64(number):
  243. # hash = 31 * 11400714819323198485 * (number << 31)
  244. # hash = (hash << 15) | (hash >> (64 - 19))
  245. # return hash
  246. # def merxorhash64(number, n):
  247. # hash = 31 * ((2**n)-1 ) * (number << 31)
  248. # hash = (hash << 15) | (hash >> (64 - 19))
  249. # return hash
  250. # import string
  251. # for char in string.ascii_lowercase:
  252. # mean = 0
  253. # for i in range(pool):
  254. # hash_a = fimurhash64(i)
  255. # hash_b = fimurhash64(i + 1)
  256. # distance = hamming(hash_a, hash_b, 64)
  257. # mean += distance/64
  258. # print(f"fimurhash64 [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
  259. # print(f"mean = {mean/pool}")
  260. # mean = 0
  261. # for i in range(pool):
  262. # hash_a = merxorhash64(i, 61)
  263. # hash_b = merxorhash64(i + 1, 61)
  264. # distance = hamming(hash_a, hash_b, 64)
  265. # mean += distance/64
  266. # print(f"merxorhash64 [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
  267. # print(f"mean = {mean/pool}")
  268. # mean = 0
  269. # for i in range(pool):
  270. # hash_a = merxorhash64(i, 31)
  271. # hash_b = merxorhash64(i + 1, 31)
  272. # distance = hamming(hash_a, hash_b, 64)
  273. # mean += distance/64
  274. # print(f"merxorhash64 [{i}]<->[{i+1}] \t {distance}/64 = {distance/64}")
  275. # print(f"mean = {mean/pool}")