14-4-NN-detect-java-buffer-overflow-attack.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334
  1. from sklearn.neural_network import MLPClassifier
  2. from datasets import Datasets
  3. from sklearn.model_selection import cross_val_score
  4. from sklearn.feature_extraction.text import CountVectorizer
  5. def main():
  6. # 加载ADFA-LD 数据
  7. x1, y1 = Datasets.load_adfa_normal()
  8. x2, y2 = Datasets.load_adfa_attack(r"Java_Meterpreter_\d+/UAD-Java-Meterpreter*")
  9. x = x1 + x2
  10. y = y1 + y2
  11. # 词袋特征
  12. cv = CountVectorizer(min_df=1)
  13. x = cv.fit_transform(x).toarray()
  14. mlp = MLPClassifier(hidden_layer_sizes=(150, 50), max_iter=10000, alpha=1e-4, solver='sgd', tol=1e-4,
  15. random_state=1, learning_rate_init=.01)
  16. # hidden_layer_sizes:第i个元素表示第i个隐藏层中的神经元数量。
  17. # slover:{‘lbfgs’,‘sgd’,‘adam’},默认’adam’。权重优化的求解器:'lbfgs’是准牛顿方法族的优化器;'sgd’指的是随机梯度下降。'adam’是指由Kingma
  18. # alpha:L2惩罚(正则化项)参数
  19. # random_state:默认无随机数生成器的状态或种子
  20. # max_iter:最大迭代次数
  21. # verbose:是否将进度消息打印到stdout
  22. # 优化的容忍度,容差优化
  23. # learning_rate_init:初始学习率
  24. scores = cross_val_score(mlp, x, y, cv=10, scoring="accuracy")
  25. print(scores.mean()) # 0.9654934210526316
  26. if __name__ == "__main__":
  27. main()