5-4-KNN-detect-abnormal-operation.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import numpy as np
  2. from sklearn.neighbors import KNeighborsClassifier
  3. from sklearn.model_selection import train_test_split, cross_val_score
  4. import matplotlib.pyplot as plt
  5. from datasets import Datasets
  6. # 特征提取,使用词集将操作命令向量化,根据操作统计命令词集来判断
  7. def get_feature(cmd, fdist):
  8. feature = []
  9. for block in cmd:
  10. v = [0] * len(fdist)
  11. for i in range(0, len(fdist)):
  12. if fdist[i] in block:
  13. v[i] += 1
  14. feature.append(v)
  15. return feature
  16. def main():
  17. data, y, fdist = Datasets.load_Schonlau('User3')
  18. x = get_feature(data, fdist)
  19. # 训练数据 120 测试数据后30
  20. # x_train, y_train = x[0:100], y[0:100]
  21. # x_test, y_test = x[100:150], y[100:150]
  22. # print(x_test, y_test)
  23. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
  24. # knn训练
  25. knn = KNeighborsClassifier(n_neighbors=3)
  26. # knn.fit(x_train, y_train)
  27. # # 查看模型分数
  28. # print(knn.score(x_test, y_test))
  29. # 交叉验证 分10组
  30. scores = cross_val_score(knn, x, y, cv=10, scoring="accuracy")
  31. print(scores.mean()) # 0.9733333333333334
  32. # # 判断k值
  33. # k_range = range(1, 30)
  34. # k_scores = []
  35. # for k in k_range:
  36. # knn = KNeighborsClassifier(n_neighbors=k)
  37. # scores = cross_val_score(knn, x, y, cv=10, scoring="accuracy")
  38. # k_scores.append(scores.mean())
  39. #
  40. # plt.plot(k_range, k_scores)
  41. # plt.xlabel("Value of K for KNN")
  42. # plt.ylabel("Cross Validated Accuracy")
  43. # plt.show()
  44. # # 根据图来看 k=3 模型最优
  45. if __name__ == "__main__":
  46. main()