7-3-NaiveBayesian-detect-abnormal-operation.py 1007 B

12345678910111213141516171819202122232425262728293031323334353637
  1. from sklearn.naive_bayes import GaussianNB
  2. from sklearn.model_selection import cross_val_score, train_test_split
  3. from sklearn.feature_extraction.text import CountVectorizer
  4. import pydotplus
  5. from datasets import Datasets
  6. # 特征提取,使用词集将操作命令向量化
  7. def get_feature(cmd, fdist):
  8. feature = []
  9. for block in cmd:
  10. v = [0] * len(fdist)
  11. for i in range(0, len(fdist)):
  12. if fdist[i] in block:
  13. v[i] += 1
  14. feature.append(v)
  15. return feature
  16. def main():
  17. data, y, fdist = Datasets.load_Schonlau('User3')
  18. # 特征提取
  19. x = get_feature(data, fdist)
  20. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
  21. # 朴素贝叶斯
  22. gnb = GaussianNB()
  23. gnb.fit(x_train, y_train)
  24. print(gnb.score(x_test, y_test)) # 1.0
  25. # 交叉验证
  26. scores = cross_val_score(gnb, x, y, cv=10, scoring="accuracy")
  27. print(scores.mean()) # 0.9933333333333334
  28. if __name__ == "__main__":
  29. main()