12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import numpy as np
- from nltk import FreqDist
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.model_selection import train_test_split, cross_val_score
- import matplotlib.pyplot as plt
- from datasets import Datasets
- def get_feature(cmd, fdist):
- max_cmd = set(fdist[0:50])
- min_cmd = set(fdist[-50:])
- feature = []
- for block in cmd:
- f1 = len(set(block))
- fdist = list(FreqDist(block).keys())
- f2 = fdist[0:10]
- f3 = fdist[-10:]
- f2 = len(set(f2) & set(max_cmd))
- f3 = len(set(f3) & set(min_cmd))
- x = [f1, f2, f3]
- feature.append(x)
- return feature
- def main():
- data, y, fdist = Datasets.load_Schonlau('User3')
-
- x = get_feature(data, fdist)
-
-
-
-
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
-
-
-
-
-
-
-
-
-
-
- k_range = range(1, 30)
- k_scores = []
- for k in k_range:
- knn = KNeighborsClassifier(n_neighbors=k)
- scores = cross_val_score(knn, x, y, cv=10, scoring="accuracy")
- k_scores.append(scores.mean())
- plt.plot(k_range, k_scores)
- plt.xlabel("Value of K for KNN")
- plt.ylabel("Cross Validated Accuracy")
- plt.show()
-
- if __name__ == "__main__":
- main()
|