acquisitions.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # author: Kevin Miller
  2. import numpy as np
  3. from scipy.stats import norm
  4. from .al_util import *
  5. import time
  6. MODELS = ['gr', 'probit-log', 'probit-norm', 'ce', 'log', 'probitnorm', 'mgr']
  7. def sgn(x):
  8. if x >= 0:
  9. return 1.
  10. else:
  11. return -1.
  12. def mc_full(Cand, m, C, modelname, gamma=0.1):
  13. if modelname not in MODELS:
  14. raise ValueError("%s is not a valid model name, must be in %s" % (modelname, MODELS))
  15. if len(m.shape) > 1: # Multiclass case
  16. if modelname == 'gr':
  17. #return np.array([ np.sqrt(np.inner(m[k,:], m[k,:])[0,0] + 1. - 2.*np.max(m[k,:])) * np.linalg.norm(C[:,k])/(gamma**2. + C[k,k]) for k in Cand])
  18. m_minus_yk = m[Cand,:]
  19. m_minus_yk.ravel()[[m.shape[1]*i for i in range(m_minus_yk.shape[0])] + np.argmax(m_minus_yk, axis=1)] -= 1.
  20. m_minus_yk_norms = np.linalg.norm(m_minus_yk, axis=1)
  21. Ck_norms = np.linalg.norm(C[:, Cand], axis=0)
  22. return Ck_norms*m_minus_yk_norms/(gamma**2. + np.diag(C)[Cand])
  23. #return np.array([np.sqrt(np.inner(m[k,:], m[k,:]) + 1. - 2.*np.max(m[k,:])) * np.linalg.norm(C[:,k])/(gamma**2. + C[k,k]) for k in Cand])
  24. else:
  25. raise NotImplementedError("Have not implemented full storage model change calculation for multiclass besides Gaussian Regression ('gr')")
  26. else:
  27. if modelname == 'probit-log' or modelname == 'log':
  28. return np.array([np.absolute(jac_calc2(m[k], sgn(m[k]), gamma))/(1. + C[k,k]*hess_calc2(m[k], sgn(m[k]), gamma)) \
  29. * np.linalg.norm(C[:,k]) for k in Cand])
  30. elif modelname == 'probit-norm' or modelname == 'probitnorm':
  31. return np.array([np.absolute(jac_calc(m[k], sgn(m[k]), gamma))/(1. + C[k,k]*hess_calc(m[k], sgn(m[k]), gamma)) \
  32. * np.linalg.norm(C[:,k]) for k in Cand])
  33. else:
  34. return np.array([np.absolute(m[k] - sgn(m[k]))/(gamma**2. + C[k,k]) * np.linalg.norm(C[:,k]) for k in Cand])
  35. def mc_reduced(C_a, alpha, v_Cand, modelname, uks=None, gamma=0.1, verbose=False, greedy=True):
  36. if modelname not in MODELS:
  37. raise ValueError("%s is not a valid model name, must be in %s" % (modelname, MODELS))
  38. if modelname == 'ce':
  39. num_cand, M = v_Cand.shape
  40. nc = alpha.shape[0]//M
  41. if uks is None:
  42. uks = v_Cand @ (alpha.reshape(nc, M).T)
  43. piks = np.exp(uks/gamma)
  44. piks /= np.sum(piks, axis=1)[:,np.newaxis]
  45. C_aV_candT = np.empty((M*nc, num_cand*nc))
  46. for c in range(nc):
  47. C_aV_candT[:,c*num_cand:(c+1)*num_cand] = C_a[:,c*M:(c+1)*M] @ v_Cand.T
  48. mc_vals = []
  49. for k in range(num_cand):
  50. inds_k_in_cand = [k + c*num_cand for c in range(nc)]
  51. CVT_k = C_aV_candT[:, inds_k_in_cand]
  52. Bk = np.diag(piks[k,:]) - np.outer(piks[k,:], piks[k,:])
  53. if np.linalg.norm(Bk, ord='fro') > 1e-7:
  54. # print("big")
  55. # print(piks[k,:])
  56. # print(Bk)
  57. uk, sk, vkt = np.linalg.svd(Bk)
  58. Tk = uk * np.sqrt(sk)[np.newaxis, :]
  59. Gk = np.empty((nc, nc))
  60. for c in range(nc):
  61. Gk[c,:] = v_Cand[k,:][np.newaxis,:] @ C_aV_candT[c*M:(c+1)*M, inds_k_in_cand]
  62. TkGk = Tk @ Gk
  63. Mk = np.eye(nc) - Tk.T @ np.linalg.inv(gamma*np.eye(nc) + TkGk @ Tk.T) @ TkGk
  64. if not greedy:
  65. Mkpi_k_yk_mat = CVT_k @ (Mk @ (np.tile(piks[k,:][:,np.newaxis], (1,nc)) - np.eye(nc)))
  66. #mc_vals_for_k = [np.linalg.norm(Mkpi_k_yk_mat[:,c]) for c in range(nc)]
  67. mc_vals_for_k = np.linalg.norm(Mkpi_k_yk_mat, axis=0)
  68. argmin_mcvals = np.argmin(mc_vals_for_k)
  69. argmax_piks = np.argmax(piks[k,:])
  70. if (argmin_mcvals != argmax_piks) and verbose:
  71. print("%d (index in Cand) did Not choose choice that we thought" % k)
  72. mc_vals.append(np.min(mc_vals_for_k))
  73. else:
  74. y_c = np.zeros(nc)
  75. y_c[np.argmax(piks[k,:])] = 1.
  76. mc_vals.append(np.linalg.norm(CVT_k @ (Mk @ (piks[k,:] - y_c))))
  77. else:
  78. # print("really small")
  79. # print(piks[k,:])
  80. # print(Bk)
  81. #print(np.linalg.norm(Bk, ord='fro'))
  82. #VK = np.kron(np.eye(nc), v_Cand[k,:][:, np.newaxis])
  83. # print(np.linalg.norm(VK @ Bk @ VK.T))
  84. # print()
  85. if not greedy:
  86. Mkpi_k_yk_mat = CVT_k @ (np.tile(piks[k,:][:,np.newaxis], (1,nc)) - np.eye(nc))
  87. #mc_vals_for_k = [np.linalg.norm(Mkpi_k_yk_mat[:,c]) for c in range(nc)]
  88. mc_vals_for_k = np.linalg.norm(Mkpi_k_yk_mat, axis=0)
  89. argmin_mcvals = np.argmin(mc_vals_for_k)
  90. argmax_piks = np.argmax(piks[k,:])
  91. if (argmin_mcvals != argmax_piks) and verbose:
  92. print("%d (index in Cand) did Not choose choice that we thought" % k)
  93. mc_vals.append(np.min(mc_vals_for_k))
  94. else:
  95. y_c = np.zeros(nc)
  96. y_c[np.argmax(piks[k,:])] = 1.
  97. mc_vals.append(np.linalg.norm(CVT_k @ (piks[k,:] - y_c)))
  98. return np.array(mc_vals)
  99. else:
  100. if uks is None: # if have not already calculated MAP estimator on full (as we are doing in our Reduced model), then do this calculation
  101. uks = v_Cand @ alpha
  102. C_a_vk = C_a @ (v_Cand.T)
  103. if modelname == 'probit-log' or modelname == 'log':
  104. return np.array([np.absolute(jac_calc2(uks[i], sgn(uks[i]),gamma))/(1. + \
  105. np.inner(v_Cand[i,:], C_a_vk[:,i])*hess_calc2(uks[i], sgn(uks[i]),gamma))* np.linalg.norm(C_a_vk[:,i]) \
  106. for i in range(v_Cand.shape[0])])
  107. elif modelname == 'probit-norm' or modelname == 'probitnorm':
  108. return np.array([np.absolute(jac_calc(uks[i], sgn(uks[i]),gamma))/(1. + \
  109. np.inner(v_Cand[i,:], C_a_vk[:,i])*hess_calc(uks[i], sgn(uks[i]),gamma))* np.linalg.norm(C_a_vk[:,i]) \
  110. for i in range(v_Cand.shape[0])])
  111. else:
  112. # Multiclass GR
  113. if len(alpha.shape) > 1:
  114. #raise NotImplementedError("Multiclass GR not implemented yet in the reduced case.")
  115. uks_minus_yk = uks.copy()
  116. uks_minus_yk.ravel()[[uks.shape[1]*i for i in range(uks.shape[0])]+ np.argmax(uks, axis=1)] -= 1.
  117. uks_minus_yk_norms = np.linalg.norm(uks_minus_yk, axis=1)
  118. C_a_vk_norms = np.linalg.norm(C_a_vk, axis=0)
  119. return np.array([C_a_vk_norms[i]*uks_minus_yk_norms[i]/(gamma**2. + \
  120. np.inner(v_Cand[i,:], C_a_vk[:,i])) for i in range(v_Cand.shape[0])])
  121. # Binary GR
  122. else:
  123. return np.array([np.absolute(uks[i] - sgn(uks[i]))/(gamma**2. + np.inner(v_Cand[i,:], C_a_vk[:,i]))
  124. * np.linalg.norm(C_a_vk[:,i]) for i in range(v_Cand.shape[0])])
  125. def mc_avg_reduced(model, Cand, beta=0.0):
  126. assert not model.full_storage
  127. if not model.modelname in ['mgr', 'gr']:
  128. raise ValueError("{} currently only implemented for 'MGR' and 'GR' models..")
  129. v_Cand = model.v[Cand, :]
  130. uks = v_Cand @ model.alpha
  131. C_a_vk = model.C_a @ (v_Cand.T)
  132. # Multiclass GR
  133. if len(model.alpha.shape) > 1:
  134. uks_minus_yk = uks.copy()
  135. uks_minus_yk.ravel()[[uks.shape[1]*i for i in range(uks.shape[0])]+ np.argmax(uks, axis=1)] -= 1.
  136. uks_minus_yk_norms = np.linalg.norm(uks_minus_yk, axis=1)
  137. C_a_vk_norms = np.linalg.norm(C_a_vk, axis=0)
  138. return np.array([C_a_vk_norms[i]**(1. + beta) * (uks_minus_yk_norms[i]**(1. - beta))/(model.gamma**2. + \
  139. np.inner(v_Cand[i,:], C_a_vk[:,i])) for i in range(v_Cand.shape[0])])
  140. # Binary GR
  141. else:
  142. return np.array([np.absolute(uks[i] - sgn(uks[i]))**(1.- beta)/(model.gamma**2. + np.inner(v_Cand[i,:], C_a_vk[:,i]))
  143. * np.linalg.norm(C_a_vk[:,i])**(1.+beta) for i in range(v_Cand.shape[0])])
  144. def mc_app_full_red(model_, Cand):
  145. assert not model_.full_storage
  146. lam_bar_inv = 1./(model_.d[-1] + model_.tau**2.)
  147. N, M = model_.v.shape
  148. v_Cand = model_.v[model_.unlabeled, :].T
  149. C_a_vk = model_.C_a @ v_Cand
  150. C_a_vk_norms = np.linalg.norm(C_a_vk, axis=0)
  151. v_Cand_norms = np.linalg.norm(v_Cand, axis=0)**2. # calculated ahead of time
  152. A = model_.v @ (C_a_vk - lam_bar_inv * v_Cand) # can calculate 2nd half ahead of time
  153. A[model_.unlabeled, range(len(model_.unlabeled))] += lam_bar_inv
  154. A_norms = np.linalg.norm(A, axis=0)
  155. if len(model_.alpha.shape) > 1:
  156. return np.array([ C_a_vk_norms[i]/(model_.gamma**2. + np.inner(v_Cand[:,i], C_a_vk[:,i]) + lam_bar_inv*(1. - v_Cand_norms[i]))
  157. *A_norms[i] for i,k in enumerate(model_.unlabeled)])
  158. else:
  159. return np.array([ np.absolute(model_.m[k] - np.sign(model_.m[k]))/(model_.gamma**2. + np.inner(v_Cand[:,i], C_a_vk[:,i]) + lam_bar_inv*(1. - v_Cand_norms[i]))
  160. *A_norms[i] for i,k in enumerate(model_.unlabeled)])
  161. def mcavg_app_full_red(model_, Cand, beta=0.0):
  162. assert not model_.full_storage
  163. lam_bar_inv = 1./(model_.d[-1] + model_.tau**2.)
  164. N, M = model_.v.shape
  165. v_Cand = model_.v[model_.unlabeled, :].T
  166. C_a_vk = model_.C_a @ v_Cand
  167. C_a_vk_norms = np.linalg.norm(C_a_vk, axis=0)
  168. v_Cand_norms = np.linalg.norm(v_Cand, axis=0)**2. # calculated ahead of time
  169. A = model_.v @ (C_a_vk - lam_bar_inv * v_Cand) # can calculate 2nd half ahead of time
  170. A[model_.unlabeled, range(len(model_.unlabeled))] += lam_bar_inv
  171. A_norms = np.linalg.norm(A, axis=0)
  172. if len(model_.alpha.shape) > 1:
  173. return np.array([ C_a_vk_norms[i]**(1.-beta)/(model_.gamma**2. + np.inner(v_Cand[:,i], C_a_vk[:,i]) + lam_bar_inv*(1. - v_Cand_norms[i]))
  174. *A_norms[i]**(1.+beta) for i,k in enumerate(model_.unlabeled)])
  175. else:
  176. return np.array([ np.absolute(model_.m[k] - np.sign(model_.m[k]))**(1.-beta)/(model_.gamma**2. + np.inner(v_Cand[:,i], C_a_vk[:,i]) + lam_bar_inv*(1. - v_Cand_norms[i]))
  177. *A_norms[i]**(1.+beta) for i,k in enumerate(model_.unlabeled)])