12345678910111213141516171819202122232425262728 |
- import numpy as np
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.model_selection import train_test_split, cross_val_score
- from sklearn.feature_extraction.text import CountVectorizer
- from datasets import Datasets
- import matplotlib.pyplot as plt
- import pandas as pd
- def main():
- # 加载ADFA-LD 数据
- x1, y1 = Datasets.load_adfa_normal()
- x2, y2 = Datasets.load_adfa_attack(r"Web_Shell_\d+/UAD-W*")
- x = x1 + x2
- y = y1 + y2
- # 词袋特征
- cv = CountVectorizer()
- x = cv.fit_transform(x).toarray()
- knn = KNeighborsClassifier(n_neighbors=3)
- scores = cross_val_score(knn, x, y, cv=10, scoring='accuracy')
- print(scores.mean()) # 0.9663706140350878
- if __name__ == "__main__":
- main()
|