12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343 |
- import numpy as np
- import matplotlib.pyplot as plt
- import matplotlib
- import argparse
- import os
- import sys
- sys.path.append('..')
- from util.mlflow_util import load_uri
- import mlflow
- from cycler import cycler
- print(plt.style.available)
- # MATLPLOTLIB settings
- plt.style.use('default')
- plt.style.use('seaborn-deep')
- matplotlib.rcParams['text.usetex'] = True
- matplotlib.rcParams['text.latex.unicode'] = True
- matplotlib.rcParams['axes.prop_cycle'] = cycler(color=['r', 'g', 'b', 'y', 'cyan', 'brown', 'k', 'gray', 'orange','purple', 'pink'])
- SMALL_SIZE = 16
- MEDIUM_SIZE = 22
- BIGGER_SIZE = 24
- plt.rc('font', size=SMALL_SIZE) # controls default text sizes
- plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
- plt.rc('axes', labelsize=26) # fontsize of the x and y labels
- plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
- plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels
- plt.rc('legend', fontsize=18) # legend fontsize
- plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
- acq_model2label_marker_color = {
- 'vopt-mgr' : ('VOPT-MGR', '+', 'k'),
- 'sopt-mgr' : ('SOPT-MGR', 'x', 'brown'),
- 'sopt-hf' : ('SOPT-HF', 'v', 'pink'),
- 'vopt-hf' : ('VOPT-HF', '^', 'm'),
- 'vopt-gr' : ('VOPT-GR', '+', 'k'),
- 'sopt-gr' : ('SOPT-GR', 'x', 'brown'),
- 'mc-mgr' : ('MC-MGR', '*', 'g'),
- 'mcgreedy-ce' : ('MCG-CE', 'v', 'cyan'),
- 'mc-ce' : ('MC-CE', 'v', 'b'),
- 'mc-probitnorm' : ('MC-P', 's', 'y'),
- 'mc-gr' : ('MC-GR', '*', 'g'),
- 'mc-log' : ('MC-LOG', 'o', 'b'),
- 'rand-mgr' : ('RAND-MGR', '<', 'purple'),
- 'rand-log' : ('RAND-LOG', '<', 'purple'),
- 'rand-ce' : ('RAND-CE', '<', 'purple'),
- 'rand-gr' : ('RAND-GR', '<', 'purple'),
- 'db-rkhs' : ('DB-RKHS', '>', 'r'),
- 'uncertainty-mgr' : ('UNC-MGR', 'P', 'orange'),
- 'uncertainty-gr' : ('UNC-GR', 'P', 'orange'),
- 'uncertainty-ce' : ('UNC-CE', 'X', 'gray'),
- 'uncertainty-log' : ('UNC-LOG', 'X', 'gray'),
- 'uncertainty-probitnorm' : ('UNC-P', 'd', 'gray'),
- }
- mlflow.set_tracking_uri('../mlruns')
- client = mlflow.tracking.MlflowClient()
- save_root = './for-paper/'
- # ## Binary Clusters
- # +
- exp_name = 'sequential-binary-clusters'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 2
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(8,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[:50:skip], ACC[:50:skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[:50:skip], ACC[:50:skip], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- # plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- checkdata = np.load('../data/binary_clusters2/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- for r in all_runs:
- print(r.data.tags['mlflow.runName'])
- if r.data.tags['mlflow.runName'][-1] != '0':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))[:350]
- if np.max(choices) > 1999:
- print('found checker run with more than 2000 nodes')
- continue
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices[:50],0], X[choices[:50],1], marker='*', s=90, c='gold', linewidths=0.6, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']))
- plt.show()
- # +
- # Redo of experiment -- changed tau = 0.001 and gamma = 0.5
- exp_name = 'sequential-binary-clusters3'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 2
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(8,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[:50:skip], ACC[:50:skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[:50:skip], ACC[:50:skip], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- checkdata = np.load('../data/binary_clusters2/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- for r in all_runs:
- print(r.data.tags['mlflow.runName'])
- if r.data.tags['mlflow.runName'][-1] != '0':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))[:350]
- if np.max(choices) > 1999:
- print('found checker run with more than 2000 nodes')
- continue
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices[:50],0], X[choices[:50],1], marker='*', s=90, c='gold', linewidths=0.6, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']))
- plt.show()
- # # Binary Clusters 3 - Sequential
- # +
- # Redo of experiment -- changed tau = 0.001 and gamma = 0.5
- exp_name = 'binary-clusters3-sequential'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 2
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(8,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[:100:skip], ACC[:100:skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[:100:skip], ACC[:100:skip], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- checkdata = np.load('../data/binary_clusters3/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- for r in all_runs:
- print(r.data.tags['mlflow.runName'])
- if r.data.tags['mlflow.runName'][-1] != '0':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))[:350]
- if np.max(choices) > 1999:
- print('found checker run with more than 2000 nodes')
- continue
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices[:50],0], X[choices[:50],1], marker='*', s=90, c='gold', linewidths=0.6, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']))
- plt.show()
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, 'bc-gt'))
- plt.show()
- # # Binary Clusters3 - Batch
- # +
- # Redo of experiment -- changed tau = 0.001 and gamma = 0.5
- exp_name = 'binary-clusters3-batch'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 1
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(8,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[:20:skip], ACC[:20:skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[:20:skip], ACC[:20:skip], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- plt.scatter(X[:,0], X[:,1], c=labels)
- plt.show()
- checkdata2 = np.load('../data/binary_clusters_check/X_labels.npz', allow_pickle=True)
- X2 = checkdata2['X']
- labels2 = checkdata2['labels']
- print(np.allclose(X2, X))
- # ## Checker 2
- # +
- exp_name = 'checker2'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 2
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(8,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- checkdata = np.load('../data/checker2/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- for r in all_runs:
- if r.data.tags['mlflow.runName'][-1] != '0':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))[:350]
- if np.max(choices) > 1999:
- print('found checker run with more than 2000 nodes')
- continue
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices,0], X[choices,1], marker='*', s=90, c='gold', linewidths=0.6, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']))
- plt.show()
- # # Sequential Checker2
- # +
- exp_name = 'sequential-checker2'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 5
- tot = 55
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(7,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip][:tot], ACC[::skip][:tot], marker=mrkr, label=lbl, s=40, c=clr)
- plt.plot(dom[::skip][:tot], ACC[::skip][:tot], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- checkdata = np.load('../data/checker2/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- for r in all_runs:
- if r.data.tags['mlflow.runName'][-1] != '0':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))[:350]
- if np.max(choices) > 1999:
- print('found checker run with more than 2000 nodes')
- continue
- print(r.data.tags['mlflow.runName'])
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices,0], X[choices,1], marker='*', s=110, c='yellow', linewidths=0.2, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']),bbox_inches = 'tight',
- pad_inches = 0)
-
- plt.show()
- # +
- checkdata = np.load('../data/checker2/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- i1, i2 = None, None
- i = 0
- while (i1 is None or i2 is None) and i < 2000:
- x, y = X[i,0], X[i,1]
- if i1 is None:
- if 0.55 <= x <= 0.7 and 0.3 <= y <= 0.45:
- i1 = i
-
- if i2 is None:
- if 0.3 <= x <= 0.45 and 0.3 <= y <= 0.45:
- i2 = i
- i += 1
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- # plt.scatter(X[i1,0], X[i1,1], marker='*', s=90, c='gold', linewidths=0.4, edgecolors='k')
- # plt.scatter(X[i2,0], X[i2,1], marker='*', s=90, c='gold', linewidths=0.4, edgecolors='k')
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/gt.pdf'.format(exp_save_root), bbox_inches = 'tight',
- pad_inches = 0)
- plt.show()
- # -
- labeled_handchosen = [i1, i2]
- print(labeled_handchosen)
- # +
- exp_name = 'handchosen-checker2'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 5
- tot = -1
- first = True
- not_plot = ['rand-gr', 'rand-probitnorm', 'uncertainty-gr', 'uncertainty-probitnorm', 'vopt-gr', 'sopt-gr']
- plt.figure(figsize=(7,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- # runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName'] and r.data.params['cand'] == 'full']
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip][:tot], ACC[::skip][:tot], marker=mrkr, label=lbl, s=40, c=clr)
- plt.plot(dom[::skip][:tot], ACC[::skip][:tot], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- #plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- checkdata = np.load('../data/checker2/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- for r in all_runs:
- if r.data.tags['mlflow.runName'][-1] != '1' or r.data.params['cand'] != 'full':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))
- if np.max(choices) > 1999:
- print('found checker run with more than 2000 nodes')
- continue
- print(r.data.tags['mlflow.runName'])
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices,0], X[choices,1], marker='*', s=100, c='gold', linewidths=0.4, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- #plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']))
- plt.show()
- # # Binary MNIST
- # +
- exp_name = 'sequential-binary-mnist'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 1
- tot = 100
- first = True
- not_plot = ['rand-log', 'rand-probitnorm', 'uncertainty-log', 'uncertainty-probitnorm']
- plt.figure(figsize=(10,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip][:tot], ACC[::skip][:tot], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip][:tot], ACC[::skip][:tot], linewidth=0.9, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # -
- # # Checker 3
- # +
- exp_name = 'checker3'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 3
- first = True
- not_plot = ['rand-mgr', 'uncertainty-mgr']
- plt.figure(figsize=(10,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=0.5, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- #plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # +
- # Plot figure
- skip = 3
- first = True
- for modelname in ['mgr', 'ce']:
- plt.figure(figsize=(7,5))
- for setup_name in setup_names:
-
- if modelname not in setup_name.split('-')[1]:
- continue
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
- ACC /= float(len(runs))
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=1.5, c=clr)
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- plt.savefig('{}/acc-{}.pdf'.format(exp_save_root, modelname))
- plt.show()
- # -
- checkdata = np.load('../data/checker3/X_labels.npz', allow_pickle=True)
- X = checkdata['X']
- labels = checkdata['labels']
- clrs = np.array(X.shape[0]*['r'])
- clrs[labels == 0] = 'b'
- clrs[labels== 1] = 'g'
- for r in all_runs:
- if r.data.tags['mlflow.runName'][-1] != '1':
- continue
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- #print(list(iter_stats.keys()))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))[:]
- if np.max(choices) > 2999:
- continue
- plt.figure(figsize=(5,5))
- plt.scatter(X[:,0], X[:,1], c=clrs)
- plt.scatter(X[choices,0], X[choices,1], marker='*', s=90, c='gold', linewidths=0.6, edgecolors='k')
- #plt.title(r.data.tags['mlflow.runName'])
- plt.xticks([], [])
- plt.yticks([], [])
- plt.savefig('{}/{}.pdf'.format(exp_save_root, r.data.tags['mlflow.runName']))
- plt.show()
- # # MNIST
- # +
- exp_name = 'mnist'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 3
- first = True
- not_plot = ['rand-mgr', 'uncertainty-mgr']
- plt.figure(figsize=(7,5))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=0.5, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- #plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # +
- # Plot figure
- skip = 3
- first = True
- for modelname in ['mgr', 'ce']:
- plt.figure(figsize=(7,5))
- mm = 1.0
- for setup_name in setup_names:
-
- if modelname not in setup_name.split('-')[1]:
- continue
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
- ACC /= float(len(runs))
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=1.5, c=clr)
- if min(ACC[::skip]) < mm:
- mm = min(ACC[::skip])
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.ylim([mm,1.0])
- plt.xticks([i*100 for i in range(6)])
- plt.tight_layout()
- plt.savefig('{}/acc-{}2.pdf'.format(exp_save_root, modelname))
- plt.show()
- # -
- # # Salinas
- # +
- exp_name = 'salinas'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 3
- first = True
- not_plot = [] #['rand-mgr', 'uncertainty-mgr']
- plt.figure(figsize=(10,6))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) in not_plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=0.5, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- #plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # +
- # Plot figure
- skip = 3
- first = True
- for modelname in ['mgr', 'ce']:
- plt.figure(figsize=(7,5))
- mm = 1.0
- for setup_name in setup_names:
-
- if modelname not in setup_name.split('-')[1]:
- continue
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
- ACC /= float(len(runs))
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=1.5, c=clr)
- if min(ACC[::skip]) < mm:
- mm = min(ACC[::skip])
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.ylim([mm, 0.9])
- plt.xticks([i*100 for i in range(6)])
- plt.tight_layout()
- plt.savefig('{}/acc-{}2.pdf'.format(exp_save_root, modelname))
- plt.show()
- # -
- # # Urban
- # +
- exp_name = 'urban'
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- print(len(all_runs))
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- # +
- # Plot figure
- skip = 3
- first = True
- not_plot = ['mc-mgr', 'mc-ce']
- plt.figure(figsize=(7,5))
- for setup_name in setup_names:
- if '-'.join(setup_name.split('-')[:2]) not in plot:
- continue
-
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
-
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
-
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
-
- ACC /= float(len(runs))
-
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
-
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=0.5, c=clr)
-
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.tight_layout()
- #plt.savefig('{}/acc.pdf'.format(exp_save_root))
- plt.show()
- # +
- # Plot figure
- skip = 3
- first = True
- for modelname in ['mgr', 'ce']:
- plt.figure(figsize=(7,5))
- mm = 1.0
- for setup_name in setup_names:
-
- if modelname not in setup_name.split('-')[1]:
- continue
- runs = [r for r in all_runs if setup_name in r.data.tags['mlflow.runName']]
- acq_model = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
- al_iters = int(runs[0].data.params['al_iters'])
- ACC = np.zeros(al_iters + 1)
- print(len(runs), setup_name)
- for r in runs:
- acc = np.array([r.data.metrics['init_acc']])
- iter_stats = load_uri(os.path.join(r.info.artifact_uri, 'iter_stats.npz'))
- ACC += np.concatenate((acc, iter_stats['iter_acc']))
- ACC /= float(len(runs))
- if first:
- num_init_labeled = len(load_uri(os.path.join(r.info.artifact_uri, 'init_labeled.npy')))
- B = int(runs[0].data.params['B'])
- dom = [num_init_labeled + B*i for i in range(al_iters+1)]
- first = False
- plt.scatter(dom[::skip], ACC[::skip], marker=mrkr, label=lbl, s=50, c=clr)
- plt.plot(dom[::skip], ACC[::skip], linewidth=1.5, c=clr)
- if min(ACC[::skip]) < mm:
- mm = min(ACC[::skip])
- plt.legend()
- plt.xlabel("Number of labeled points, $|\mathcal{L}|$")
- plt.ylabel("Accuracy")
- plt.ylim([mm, 1.0])
- plt.xticks([i*100 for i in range(6)])
- plt.tight_layout()
- plt.savefig('{}/acc-{}2.pdf'.format(exp_save_root, modelname))
- plt.show()
- # -
- # # Timing
- from collections import defaultdict
- mlflow.set_tracking_uri('../mlruns-old')
- client = mlflow.tracking.MlflowClient()
- # +
- TIMES = defaultdict(list)
- sizes = [2000, 5000, 10000, 20000]
- for i, exp_name in enumerate(['checker2', '5K-checker2', '10K-c2', '20K-c2']):
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- query += ' and params.B = "{}"'.format(B)
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
-
-
- for setup_name in setup_names:
- if 'rand' in setup_name or 'uncertainty' in setup_name:
- continue
- runs = [r for r in all_runs if (setup_name in r.data.tags['mlflow.runName'] and r.data.tags['mlflow.runName'][-1] == '0')]
- print(len(runs))
- for r in runs:
- a_uri_split = str(r.info.artifact_uri).split('/')
- a_uri_split[7] += '-old'
- a_uri = '/'.join(a_uri_split)
- iter_stats = load_uri(os.path.join(a_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(a_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))
- k = '-'.join(r.data.tags['mlflow.runName'].split('-')[:2])
- times = iter_stats['iter_time']
- if i == 0:
- if np.max(choices) < 2000:
- TIMES[k].append((2, np.average(times)))
- break
- else:
- TIMES[k].append((sizes[i]//1000, np.average(times)))
- break
-
- for k in TIMES:
- print(k)
- print(TIMES[k])
-
- # +
- TIMESMC = defaultdict(list)
- mlflow.set_tracking_uri('../mlruns')
- client = mlflow.tracking.MlflowClient()
- B = 5
- for exp_name in ['checker3', 'salinas', 'mnist', 'urban']:
- exp_save_root = os.path.join(save_root, exp_name)
- if not os.path.exists(exp_save_root):
- os.makedirs(exp_save_root)
- experiment = client.get_experiment_by_name(exp_name)
- query = 'attributes.status = "FINISHED"'
- print(query)
- all_runs = client.search_runs(experiment.experiment_id, filter_string=query)
- setup_names = sorted(set(['-'.join(r.data.tags['mlflow.runName'].split('-')[:-1]) for r in all_runs]))
- print(setup_names)
- for setup_name in setup_names:
- if 'rand' in setup_name or 'uncertainty' in setup_name:
- continue
- runs = [r for r in all_runs if (setup_name in r.data.tags['mlflow.runName'] and r.data.tags['mlflow.runName'][-1] == '0')]
-
- iter_stats = load_uri(os.path.join(runs[0].info.artifact_uri, 'iter_stats.npz'))
- choices = iter_stats['al_choices'].flatten()
- init_labeled = load_uri(os.path.join(runs[0].info.artifact_uri, 'init_labeled.npy'))
- choices = np.concatenate((init_labeled, choices))
- k = '-'.join(runs[0].data.tags['mlflow.runName'].split('-')[:2])
- times = iter_stats['iter_time']
- TIMESMC[k].append((exp_name, np.average(times)))
-
- for k in TIMESMC:
- print(k)
- print(TIMESMC[k])
- # +
- name2size = {
- 'checker3' : 3000,
- 'salinas' : 7148,
- 'mnist' : 70000,
- 'urban' : 94129
- }
- fig, ax = plt.subplots(1,1, figsize=(8,5))
- lines = []
- Names = []
- for k in TIMES:
- if 'sopt' in k: continue
- digs, times = zip(*TIMES[k])
- acq_model = '-'.join(k.split("-")[:2]).lower()
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
- p1, = ax.loglog(1000*np.array(digs), times, c=clr)
- p2 = ax.scatter(1000*np.array(digs), times, marker=mrkr, c=clr, s=50)
- lines.append((p1, p2))
- Names.append(lbl)
- # plt.legend()
- # plt.show()
- for k in TIMESMC:
- if 'sopt' in k: continue
- names, times = zip(*TIMESMC[k])
- acq_model = '-'.join(k.split("-")[:2]).lower()
- lbl, mrkr, clr = acq_model2label_marker_color[acq_model]
- numbers = np.array([name2size[name] for name in names])
- p1, = ax.loglog(numbers, times, '--', c=clr)
- p2 = ax.scatter(numbers, times, marker=mrkr, c=clr, s=50)
- lines.append((p1, p2))
- Names.append(lbl)
- print(lines)
- print(Names)
- ax.legend(lines, Names, bbox_to_anchor=(1.01, 1.0))
- ax.set_xlabel("Size of Dataset, $N$")
- ax.set_ylabel("Avg. AL Query Time")
- plt.savefig(os.path.join(save_root, "timing.pdf"), bbox_inches = "tight")
- plt.tight_layout()
- plt.show()
- # -
- 70000./20.
- 20./70000.
|