gbssl.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. '''
  2. Graph Based SSL model class file -- TODO: comment the classes
  3. '''
  4. from .al_util import *
  5. VALID_MODELS = ['gr', 'probit-log', 'probit-norm', 'log', 'probitnorm', 'mgr']
  6. class BinaryGraphBasedSSLModel(object):
  7. '''
  8. Implements full storage of C_tau, C.
  9. * Can have truncated eigenvalues and eigenvectors
  10. Get rid of storing eigenvectors and eigenvalues?? (Since we're already committing to storing C_tau and C fully)
  11. '''
  12. def __init__(self, modelname, gamma, tau, v=None, w=None, Ct=None, t=None):
  13. self.gamma = gamma
  14. self.tau = tau
  15. if v is None:
  16. raise ValueError("Need to provide the eigenvectors in the variable 'v'")
  17. if w is None:
  18. raise ValueError("Need to provide the eigenvalues in the variable 'w'")
  19. self.v = v
  20. if self.v.shape[0] != self.v.shape[1]:
  21. self.trunc = True
  22. else:
  23. self.trunc = False
  24. self.w = w
  25. if t is not None: # heat kernel option, self.d is the diagonal of Lambda (evals) matrix
  26. assert type(t) in [int, float], "Parameter 't' must either be of type int or float"
  27. self.d = np.exp(t*self.w)
  28. else:
  29. #self.d = (self.tau ** (-2.)) * ((self.w + self.tau**2.))
  30. self.d = self.w + self.tau**2.
  31. if Ct is not None:
  32. self.Ct = Ct
  33. else:
  34. self.Ct = (self.v * (1./self.d)) @ self.v.T
  35. if modelname not in VALID_MODELS:
  36. raise ValueError("%s is not a valid modelname, must be in %s" % (modelname, str(VALID_MODELS)))
  37. self.modelname = modelname
  38. self.full_storage = True
  39. self.m = None
  40. self.C = None
  41. return
  42. def calculate_model(self, labeled, y):
  43. if self.modelname == 'gr':
  44. C_a = sp.linalg.inv(np.diag(self.d) + self.v[labeled,:].T @ self.v[labeled,:] / (self.gamma**2.))
  45. self.C = self.v @ C_a @ self.v.T
  46. self.m = self.C[:,labeled] @ np.array(y)/(self.gamma**2.)
  47. else:
  48. self.m = self.get_m(labeled, y)
  49. self.C = self.get_C(labeled, y, self.m)
  50. self.labeled = labeled
  51. if len(self.m.shape) > 1:
  52. self.nc = self.m.shape[1]
  53. self.y = np.array(y)
  54. else:
  55. self.nc = 2
  56. self.y = y
  57. self.unlabeled = list(filter(lambda x: x not in self.labeled, range(self.C.shape[0])))
  58. return
  59. def update_model(self, Q, yQ, exact=False):
  60. if self.m is None or self.C is None:
  61. print("Previous model not defined, so assuming you are passing in initial labeled set and labelings...")
  62. self.calculate_model(Q, yQ)
  63. return
  64. if not exact:
  65. for k,yk in zip(Q, yQ):
  66. if self.modelname == 'gr':
  67. if self.nc > 2:
  68. self.m += np.outer(self.C[:, k],(yk - self.m[k,:])/(self.gamma**2 + self.C[k,k]))
  69. else:
  70. self.m += (yk - self.m[k])/(self.gamma**2 + self.C[k,k])*self.C[:, k]
  71. self.C -= np.outer(self.C[:,k], self.C[:,k])/(self.gamma**2. + self.C[k,k])
  72. elif self.modelname == 'probit-log' or self.modelname == 'log':
  73. self.m -= jac_calc2(self.m[k], yk, self.gamma) / (1. + self.C[k,k] * hess_calc2(self.m[k], yk, self.gamma))*self.C[k,:]
  74. self.C -= hess_calc2(self.m[k], yk, self.gamma)/(1. + self.C[k,k] * hess_calc2(self.m[k], yk, self.gamma))*np.outer(self.C[k,:], self.C[k,:])
  75. elif self.modelname == 'probit-norm' or self.modelname == 'probitnorm':
  76. self.m -= jac_calc(self.m[k], yk, self.gamma) / (1. + self.C[k,k] * hess_calc(self.m[k], yk, self.gamma))*self.C[k,:]
  77. self.C -= hess_calc(self.m[k], yk, self.gamma)/(1. + self.C[k,k]*hess_calc(self.m[k], yk, self.gamma))*np.outer(self.C[k,:], self.C[k,:])
  78. else:
  79. raise ValueError("model name %s not recognized or implemented" % str(self.modelname))
  80. self.unlabeled.remove(k)
  81. self.labeled += list(Q)
  82. if self.nc > 2:
  83. self.y = np.concatenate((self.y, yQ))
  84. else:
  85. self.y += list(yQ)
  86. else:
  87. self.labeled += list(Q)
  88. if self.nc > 2:
  89. self.y = np.concatenate((self.y, yQ))
  90. else:
  91. self.y += list(yQ)
  92. self.calculate_model(self.labeled, self.y)
  93. return
  94. def get_m(self, Z, y):
  95. if self.modelname == "probit-norm" or self.modelname == 'probitnorm':
  96. if len(y) <= len(self.w):
  97. return probit_map_dr(Z, y, self.gamma, self.Ct)
  98. else:
  99. return probit_map_st(Z, y, self.gamma, self.d, self.v)
  100. elif self.modelname == "probit-log" or self.modelname == 'log':
  101. if len(y) <= len(self.w):
  102. return probit_map_dr2(Z, y, self.gamma, self.Ct)
  103. else:
  104. return probit_map_st2(Z, y, self.gamma, self.d, self.v)
  105. elif self.modelname == "gr":
  106. #return gr_map(Z, y, self.gamma, self.Ct)
  107. return gr_map(Z, y, self.gamma, self.d, self.v)
  108. else:
  109. raise ValueError("did not recognize modelname = %s" % self.modelname)
  110. def get_C(self, Z, y, m):
  111. if self.modelname == "probit-norm" or self.modelname == 'probitnorm':
  112. if len(y) <= len(self.w) or not self.trunc:
  113. return Hess_inv(m, Z, y, self.gamma, self.Ct)
  114. else:
  115. return Hess_inv_st(m, Z, y, self.d, self.v, self.gamma)
  116. elif self.modelname == "probit-log" or self.modelname == 'log':
  117. if len(y) <= len(self.w) or not self.trunc:
  118. return Hess2_inv(m, Z, y, self.gamma, self.Ct)
  119. else:
  120. return Hess_inv_st2(m, Z, y, self.d, self.v, self.gamma)
  121. elif self.modelname == "gr":
  122. return gr_C(Z, self.gamma, self.d, self.v)
  123. else:
  124. raise ValueError("did not recognize modelname = %s" % self.modelname)
  125. class BinaryGraphBasedSSLModelReduced(object):
  126. '''
  127. '''
  128. def __init__(self, modelname, gamma, tau, v=None, w=None, t=None):
  129. self.gamma = gamma
  130. self.tau = tau
  131. if v is None:
  132. raise ValueError("Need to provide the eigenvectors in the variable 'v'")
  133. if w is None:
  134. raise ValueError("Need to provide the eigenvalues in the variable 'w'")
  135. self.v = v
  136. self.trunc = True
  137. if self.v.shape[0] == self.v.shape[1]:
  138. print("Warning : It appears that you've given the full spectrum, this class is not optimized for that case...")
  139. self.w = w
  140. if t is not None: # heat kernel option, self.d is the diagonal of Lambda (evals) matrix
  141. assert type(t) in [int, float], "Parameter 't' must either be of type int or float"
  142. self.d = np.exp(t*self.w)
  143. else:
  144. #self.d = (self.tau ** (-2.)) * ((self.w + self.tau**2.))
  145. self.d = self.w + self.tau**2.
  146. if modelname not in VALID_MODELS:
  147. raise ValueError("%s is not a valid modelname, must be in %s" % (modelname, str(VALID_MODELS)))
  148. self.full_storage = False
  149. self.modelname = modelname
  150. self.m = None
  151. self.alpha = None
  152. self.C_a = None
  153. return
  154. def calculate_model(self, labeled, y):
  155. if self.modelname in ['gr', 'mgr']:
  156. self.C_a = sp.linalg.inv(np.diag(self.d) + self.v[labeled,:].T @ self.v[labeled,:] / (self.gamma**2.))
  157. self.alpha = self.C_a @ self.v[labeled,:].T @ np.array(y) / (self.gamma**2.)
  158. else:
  159. self.alpha = self.get_alpha(labeled, y)
  160. self.C_a = self.get_C_alpha(labeled, y)
  161. self.m = self.v @ self.alpha
  162. self.labeled = labeled
  163. if len(self.alpha.shape) > 1:
  164. self.nc = self.alpha.shape[1]
  165. self.y = np.array(y)
  166. else:
  167. self.nc = 2
  168. self.y = y
  169. self.unlabeled = list(filter(lambda x: x not in self.labeled, range(self.v.shape[0])))
  170. return
  171. def update_model(self, Q, yQ, exact=False):
  172. if self.alpha is None or self.C_a is None:
  173. print("Previous model not defined, so assuming you are passing in initial labeled set and labelings...")
  174. self.calculate_model(Q, yQ)
  175. return
  176. if not exact:
  177. for k, yk in zip(Q, yQ):
  178. C_a_vk = self.C_a @ self.v[k,:]
  179. ip = np.inner(self.v[k,:], C_a_vk)
  180. mk = np.inner(self.v[k,:], self.alpha.T)
  181. if self.modelname in ['gr', 'mgr']:
  182. if self.nc > 2:
  183. self.alpha += np.outer(C_a_vk, (yk - mk)/(self.gamma**2 + ip))
  184. else:
  185. self.alpha += (yk - mk)/(self.gamma**2 + ip) * C_a_vk
  186. self.C_a -= np.outer(C_a_vk, C_a_vk)/(self.gamma**2. + ip)
  187. elif self.modelname == 'probit-log' or self.modelname == 'log':
  188. self.alpha -= jac_calc2(mk, yk, self.gamma) / (1. + ip * hess_calc2(mk, yk, self.gamma))*C_a_vk
  189. mk = np.inner(self.v[k,:], self.alpha)
  190. self.C_a -= hess_calc2(mk, yk, self.gamma)/(1. + ip * hess_calc2(mk, yk, self.gamma))*np.outer(C_a_vk, C_a_vk)
  191. elif self.modelname == 'probit-norm' or self.modelname == 'probitnorm':
  192. self.alpha -= jac_calc(mk, yk, self.gamma) / (1. + ip * hess_calc(mk, yk, self.gamma))*C_a_vk
  193. mk = np.inner(self.v[k,:], self.alpha)
  194. self.C_a -= hess_calc(mk, yk, self.gamma)/(1. + ip * hess_calc(mk, yk, self.gamma))*np.outer(C_a_vk, C_a_vk)
  195. else:
  196. raise ValueError("model name %s not recognized or implemented" % str(model))
  197. try:
  198. self.unlabeled.remove(k)
  199. except:
  200. print(k)
  201. print(Q)
  202. print(self.labeled)
  203. #print(self.unlabeled)
  204. self.m = self.v @ self.alpha
  205. self.labeled += list(Q)
  206. if self.nc > 2:
  207. self.y = np.concatenate((self.y, yQ))
  208. else:
  209. self.y += list(yQ)
  210. else:
  211. self.labeled += list(Q)
  212. if self.nc > 2:
  213. self.y = np.concatenate((self.y, yQ))
  214. else:
  215. self.y += list(yQ)
  216. self.calculate_model(self.labeled, self.y)
  217. return
  218. def get_alpha(self, Z, y):
  219. '''
  220. '''
  221. if self.modelname == "probit-norm" or self.modelname == 'probitnorm':
  222. return probit_map_st_alpha(Z, y, self.gamma, self.d, self.v)
  223. elif self.modelname == "probit-log" or self.modelname == 'log':
  224. return probit_map_st2_alpha(Z, y, self.gamma, self.d, self.v)
  225. elif self.modelname in ["gr", "mgr"]:
  226. vZ = self.v[Z, :]
  227. C_a = np.diag(self.d) + vZ.T @ vZ / (self.gamma**2.)
  228. return (1. / self.gamma**2.)* sp.linalg.inv(C_a) @ vZ.T @ np.array(y)
  229. else:
  230. pass
  231. def get_C_alpha(self, Z, y):
  232. '''
  233. '''
  234. if self.modelname == "probit-norm" or self.modelname == 'probitnorm':
  235. return Hess_inv_st_alpha(self.alpha, y, 1./self.d, self.v[Z,:], self.gamma)
  236. elif self.modelname == "probit-log" or self.modelname == 'log':
  237. return Hess_inv_st2_alpha(self.alpha, y, 1./self.d, self.v[Z,:], self.gamma)
  238. elif self.modelname in ["gr", "mgr"]:
  239. vZ = self.v[Z, :]
  240. C_a = np.diag(self.d) + vZ.T @ vZ / (self.gamma**2.)
  241. return sp.linalg.inv(C_a)
  242. else:
  243. pass
  244. class CrossEntropyGraphBasedSSLModelReduced(object):
  245. '''
  246. Softmax model implementation is only available in the reduced case.
  247. '''
  248. def __init__(self, gamma, tau, v=None, w=None, t=None):
  249. self.gamma = gamma
  250. self.tau = tau
  251. if v is None:
  252. raise ValueError("Need to provide the eigenvectors in the variable 'v'")
  253. if w is None:
  254. raise ValueError("Need to provide the eigenvalues in the variable 'w'")
  255. self.v = v
  256. self.trunc = True
  257. self.N, self.M = self.v.shape
  258. if self.v.shape[0] == self.v.shape[1]:
  259. print("Warning : It appears that you've given the full spectrum, this class is not optimized for that case...")
  260. self.w = w
  261. if t is not None: # heat kernel option, self.d is the diagonal of Lambda (evals) matrix
  262. assert type(t) in [int, float], "Parameter 't' must either be of type int or float"
  263. self.d = np.exp(t*self.w)
  264. else:
  265. #self.d = (self.tau ** (-2.)) * (self.w + self.tau**2.)
  266. self.d = self.w + self.tau**2.
  267. self.full_storage = False
  268. self.modelname = "ce"
  269. self.m = None
  270. self.alpha = None
  271. self.C_a = None
  272. return
  273. def calculate_model(self, labeled, y):
  274. self.nc = y.shape[1]
  275. self.alpha, H_a = self.get_alpha(labeled, y)
  276. self.C_a = sp.linalg.inv(H_a)
  277. self.m = self.v @ (self.alpha.reshape(self.nc, self.M).T)
  278. self.y = np.array(y)
  279. self.labeled = labeled
  280. self.unlabeled = list(filter(lambda x: x not in self.labeled, range(self.N)))
  281. return
  282. def update_model(self, Q, yQ, exact=True):
  283. if self.alpha is None or self.C_a is None:
  284. print("Previous model not defined, so assuming you are passing in initial labeled set and labelings...")
  285. self.calculate_model(Q, yQ)
  286. return
  287. if not exact:
  288. raise NotImplementedError("Have not implemented inexact NA updates for this model yet")
  289. else:
  290. #print("-- Updating CE model Exactly --")
  291. self.labeled += list(Q)
  292. self.y = np.concatenate((self.y, yQ))
  293. self.calculate_model(self.labeled, self.y)
  294. return
  295. def get_alpha(self, Z, y, newton=True):
  296. ''' Calculate the SoftMax MAP estimator
  297. Z : list of labeled nodes
  298. y : (len(Z), nc) numpy array of onehot vectors as rows
  299. '''
  300. #print('newton = {}'.format(newton))
  301. y = np.array(y) # in case y is a numpy matrix instead of numpy array
  302. vZ_ = self.v[Z,:]
  303. def f(x):
  304. pi_Z = np.exp(vZ_ @ x.reshape(self.nc, self.M).T)
  305. pi_Z /= np.sum(pi_Z, axis=1)[:, np.newaxis]
  306. vec = np.empty(self.M*self.nc)
  307. for c in range(self.nc):
  308. vec[c*self.M:(c+1)*self.M] = vZ_.T @ (pi_Z[:,c] - y[:,c])
  309. return np.tile(self.gamma*self.d, (1,self.nc)).flatten() * x + vec
  310. def fprime(x):
  311. pi_Z = np.exp(vZ_ @ x.reshape(self.nc, self.M).T)
  312. pi_Z /= np.sum(pi_Z, axis=1)[:, np.newaxis]
  313. H = np.empty((self.M*self.nc, self.M*self.nc))
  314. for c in range(self.nc):
  315. for m in range(c,self.nc):
  316. Bmc = 1.*(m == c)*pi_Z[:,c] - pi_Z[:,c]*pi_Z[:,m]
  317. H[c*self.M:(c+1)*self.M, m*self.M:(m+1)*self.M] = (vZ_.T * Bmc) @ vZ_
  318. if m != c:
  319. H[m*self.M:(m+1)*self.M, c*self.M:(c+1)*self.M] = H[c*self.M:(c+1)*self.M, m*self.M:(m+1)*self.M]
  320. H /= self.gamma
  321. H.ravel()[::(self.M*self.nc+1)] += np.tile(self.gamma*self.d, (1, self.nc)).flatten()
  322. return H
  323. x0 = np.random.randn(self.N, self.nc)
  324. x0[Z,:] = y
  325. x0 = (self.v.T @ x0).T.flatten()
  326. if newton:
  327. #print("Second Order")
  328. res = root(f, x0, jac=fprime, tol=1e-9)
  329. else:
  330. #print("First Order")
  331. res = root(f, x0, tol=1e-10, method='krylov')
  332. if not res.success:
  333. print(res.success)
  334. print(np.linalg.norm(f(res.x)))
  335. print(res.message)
  336. res = root(f,x0, tol=1e-10, method='krylov')
  337. # return both alpha and H_a, the Hessian at alpha
  338. return res.x, fprime(res.x)
  339. def get_alpha_old(self, Z, y, newton=True):
  340. ''' Calculate the SoftMax MAP estimator
  341. Z : list of labeled nodes
  342. y : (len(Z), nc) numpy array of onehot vectors as rows
  343. '''
  344. print('newton = {}'.format(newton))
  345. y = np.array(y) # in case y is a numpy matrix instead of numpy array
  346. vZ_ = self.v[Z,:]/self.gamma
  347. def f(x):
  348. pi_Z = np.exp(vZ_ @ x.reshape(self.nc, self.M).T)
  349. print(np.max(np.max(pi_Z)), np.min(np.min(pi_Z)))
  350. print(np.max(np.sum(pi_Z, axis=1)), np.min(np.sum(pi_Z, axis=1)))
  351. pi_Z /= np.sum(pi_Z, axis=1)[:, np.newaxis]
  352. vec = np.empty(self.M*self.nc)
  353. for c in range(self.nc):
  354. vec[c*self.M:(c+1)*self.M] = vZ_.T @ (pi_Z[:,c] - y[:,c])
  355. return np.tile(self.d, (1,self.nc)).flatten() * x + vec
  356. def fprime(x):
  357. pi_Z = np.exp(vZ_ @ x.reshape(self.nc, self.M).T)
  358. pi_Z /= np.sum(pi_Z, axis=1)[:, np.newaxis]
  359. H = np.empty((self.M*self.nc, self.M*self.nc))
  360. for c in range(self.nc):
  361. for m in range(c,self.nc):
  362. Bmc = 1.*(m == c)*pi_Z[:,c] - pi_Z[:,c]*pi_Z[:,m]
  363. H[c*self.M:(c+1)*self.M, m*self.M:(m+1)*self.M] = (vZ_.T * Bmc) @ vZ_
  364. if m != c:
  365. H[m*self.M:(m+1)*self.M, c*self.M:(c+1)*self.M] = H[c*self.M:(c+1)*self.M, m*self.M:(m+1)*self.M]
  366. H.ravel()[::(self.M*self.nc+1)] += np.tile(self.d, (1, self.nc)).flatten()
  367. return H
  368. x0 = np.random.randn(self.N, self.nc)
  369. x0[Z,:] = y
  370. x0 = (self.v.T @ x0).T.flatten()
  371. if newton:
  372. #print("Second Order")
  373. res = root(f, x0, jac=fprime, tol=1e-9)
  374. else:
  375. #print("First Order")
  376. res = root(f, x0, tol=1e-10, method='krylov')
  377. if not res.success:
  378. print(res.success)
  379. print(np.linalg.norm(f(res.x)))
  380. print(res.message)
  381. res = root(f,x0, tol=1e-10, method='krylov')
  382. # return both alpha and H_a, the Hessian at alpha
  383. return res.x, fprime(res.x)
  384. #
  385. # ##################################################################################
  386. # ################### Helper Functions for Reduced Model ###########################
  387. # ##################################################################################
  388. #
  389. #
  390. # def Hess_inv_st2_alpha(alpha, y, d, v_Z, gamma):
  391. # '''
  392. # This method keeps everything in the "alpha"-space, of dimension num_eig x num_eig
  393. # '''
  394. # Dprime = sp.sparse.diags([1./hess_calc2(np.inner(v_Z[i,:],alpha), yi, gamma) for i, yi in enumerate(y)], format='csr')
  395. # A = d[:, np.newaxis] * ((v_Z.T @ sp.linalg.inv(Dprime + v_Z @ (d[:, np.newaxis] * (v_Z.T))) @ v_Z) * d)
  396. # return np.diag(d) - A
  397. #
  398. #
  399. # def Hess_inv_st_alpha(alpha, y, d, v_Z, gamma):
  400. # '''
  401. # This method keeps everything in the "alpha"-space, of dimension num_eig x num_eig
  402. # '''
  403. # Dprime = sp.sparse.diags([1./hess_calc(np.inner(v_Z[i,:],alpha), yi, gamma) for i, yi in enumerate(y)], format='csr')
  404. # A = d[:, np.newaxis] * ((v_Z.T @ sp.linalg.inv(Dprime + v_Z @ (d[:, np.newaxis] * (v_Z.T))) @ v_Z) * d)
  405. # return np.diag(d) - A
  406. #
  407. # def probit_map_st2_alpha(Z, y, gamma, w, v):
  408. # n,l = v.shape[1], len(y)
  409. # def f(x):
  410. # vec = np.zeros(l)
  411. # for j, (i,yi) in enumerate(zip(Z,y)):
  412. # vec[j] = - jac_calc2(np.inner(v[i,:], x), yi, gamma)
  413. # return w * x - v[Z,:].T @ vec
  414. # def fprime(x):
  415. # vec = np.zeros(l)
  416. # for j, (i,yi) in enumerate(zip(Z, y)):
  417. # vec[j] = -hess_calc2(np.inner(v[i,:], x), yi, gamma)
  418. #
  419. # H = (-v[Z,:].T * vec) @ v[Z,:]
  420. # H[np.diag_indices(n)] += w
  421. # return H
  422. # x0 = np.random.rand(len(w))
  423. # res = root(f, x0, jac=fprime)
  424. # #print(f"Root Finding is successful: {res.success}")
  425. # return res.x
  426. #
  427. # def probit_map_st_alpha(Z, y, gamma, w, v):
  428. # n,l = v.shape[1], len(y)
  429. # def f(x):
  430. # vec = np.zeros(l)
  431. # for j, (i,yi) in enumerate(zip(Z,y)):
  432. # vec[j] = - jac_calc(np.inner(v[i,:], x), yi, gamma)
  433. # return w * x - v[Z,:].T @ vec
  434. # def fprime(x):
  435. # vec = np.zeros(l)
  436. # for j, (i,yi) in enumerate(zip(Z, y)):
  437. # vec[j] = -hess_calc(np.inner(v[i,:], x), yi, gamma)
  438. #
  439. # H = (-v[Z,:].T * vec) @ v[Z,:]
  440. # H[np.diag_indices(n)] += w
  441. # return H
  442. # x0 = np.random.rand(len(w))
  443. # res = root(f, x0, jac=fprime)
  444. # #print(f"Root Finding is successful: {res.success}")
  445. # return res.x
  446. #
  447. class HFGraphBasedSSLModel(object):
  448. ''' We implement the ''full'' HF model for comparisons.
  449. '''
  450. def __init__(self, delta, L):
  451. self.modelname = 'hf'
  452. self.full_storage = True
  453. self.N = L.shape[0]
  454. self.L = L + delta**2. * sp.sparse.eye(self.N)
  455. self.f = None
  456. self.C = None
  457. return
  458. def calculate_model(self, labeled, y):
  459. self.labeled = labeled
  460. self.unlabeled = list(filter(lambda x: x not in self.labeled, range(self.N)))
  461. #print(len(self.unlabeled), len(self.labeled))
  462. self.C = np.linalg.inv(self.L[np.ix_(self.unlabeled, self.unlabeled)].A)
  463. if isinstance(y, list):
  464. self.nc = 2
  465. self.f = np.zeros(self.N)
  466. else:
  467. self.nc = y.shape[1]
  468. self.f = np.zeros((self.N, self.nc))
  469. self.y = y
  470. self.f[self.labeled] = np.array(y)
  471. self.f[self.unlabeled] = -self.C @ self.L[np.ix_(self.unlabeled, self.labeled)] @ np.array(y)
  472. return
  473. def update_model(self, Q, yQ, exact=False):
  474. if self.f is None or self.C is None:
  475. print("Previous model not defined, so assuming you are passing in initial labeled set and labelings...")
  476. self.calculate_model(Q, yQ)
  477. return
  478. # kis = []
  479. # for k, yk in zip(Q, yQ):
  480. # ki = self.unlabeled.index(k)
  481. # kis.append(ki)
  482. # self.f[self.unlabeled] += (yk - self.f[k])/(self.C[ki,ki]) * self.C[ki,:]
  483. # self.C -= np.outer(self.C[ki,:], self.C[ki,:])/self.C[ki,ki]
  484. # unlQ = list(filter(lambda x: x not in kis, range(len(self.unlabeled))))
  485. # for k, yk in zip(Q,yQ):
  486. # self.unlabeled.remove(k)
  487. # self.f[k] = yk
  488. # self.C = self.C[np.ix_(unlQ, unlQ)]
  489. self.labeled += list(Q)
  490. if self.nc > 2:
  491. self.y = np.concatenate((self.y, yQ))
  492. else:
  493. self.y += list(yQ)
  494. Qi = [self.unlabeled.index(k) for k in Q]
  495. unlQi = list(filter(lambda x: x not in Qi, range(len(self.unlabeled))))
  496. CQ_inv = np.linalg.inv(self.C[np.ix_(Qi, Qi)])
  497. self.C = self.C[np.ix_(unlQi, unlQi)] - self.C[np.ix_(unlQi, Qi)] @ CQ_inv @ self.C[np.ix_(Qi, unlQi)]
  498. self.f[Q] = np.array(yQ)
  499. for k in Q:
  500. self.unlabeled.remove(k)
  501. self.f[self.unlabeled] = -self.C @ self.L[np.ix_(self.unlabeled, self.labeled)] @ np.array(self.y)
  502. return
  503. def vopt_vals(self, Cand):
  504. unl_Cand = [self.unlabeled.index(k) for k in Cand]
  505. return np.linalg.norm(self.C[:, unl_Cand], axis=0)**2. / self.C[unl_Cand, unl_Cand]
  506. def sopt_vals(self, Cand):
  507. unl_Cand = [self.unlabeled.index(k) for k in Cand]
  508. return np.sum(self.C[:, unl_Cand], axis=0)/ self.C[unl_Cand, unl_Cand]