8-3-LogisticRegression-detect-java-buffer-overflow-attack.py 917 B

123456789101112131415161718192021222324252627282930
  1. from datasets import Datasets
  2. from sklearn.linear_model import LogisticRegression
  3. from sklearn.feature_extraction.text import CountVectorizer
  4. from sklearn.model_selection import train_test_split, cross_val_score
  5. def main():
  6. # 加载ADFA-LD 数据
  7. x1, y1 = Datasets.load_adfa_normal()
  8. x2, y2 = Datasets.load_adfa_attack(r"Java_Meterpreter_\d+/UAD-Java-Meterpreter*")
  9. x = x1 + x2
  10. y = y1 + y2
  11. # 词袋特征
  12. cv = CountVectorizer(min_df=1)
  13. x = cv.fit_transform(x).toarray()
  14. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3)
  15. # 逻辑回归训练并预测
  16. lr = LogisticRegression(solver='lbfgs', max_iter=2000)
  17. lr.fit(x_train, y_train)
  18. print(lr.score(x_test, y_test)) # 0.9340277777777778
  19. scores = cross_val_score(lr, x, y, cv=10, scoring="accuracy")
  20. print(scores.mean()) # 0.9498574561403508
  21. if __name__ == "__main__":
  22. main()