123456789101112131415161718192021222324252627282930 |
- from datasets import Datasets
- from sklearn.linear_model import LogisticRegression
- from sklearn.feature_extraction.text import CountVectorizer
- from sklearn.model_selection import train_test_split, cross_val_score
- def main():
- # 加载ADFA-LD 数据
- x1, y1 = Datasets.load_adfa_normal()
- x2, y2 = Datasets.load_adfa_attack(r"Java_Meterpreter_\d+/UAD-Java-Meterpreter*")
- x = x1 + x2
- y = y1 + y2
- # 词袋特征
- cv = CountVectorizer(min_df=1)
- x = cv.fit_transform(x).toarray()
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3)
- # 逻辑回归训练并预测
- lr = LogisticRegression(solver='lbfgs', max_iter=2000)
- lr.fit(x_train, y_train)
- print(lr.score(x_test, y_test)) # 0.9340277777777778
- scores = cross_val_score(lr, x, y, cv=10, scoring="accuracy")
- print(scores.mean()) # 0.9498574561403508
- if __name__ == "__main__":
- main()
|