123456789101112131415161718192021222324252627282930313233343536 |
- import numpy as np
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.model_selection import train_test_split, cross_val_score
- from datasets import Datasets
- import matplotlib.pyplot as plt
- import pandas as pd
- def get_rootkit_and_normal(kdd99_data):
- x = []
- y = []
- for data in kdd99_data:
- if (data[41] in ["normal.", "rootkit."]) and (data[2] == "telnet"):
- # 提取结果
- y.append(1 if data[41] == "rootkit." else 0)
- # 提取特征
- x.append(list(map(lambda i: float(i), data[9:21])))
- return x, y
- def main():
- kdd99_data = Datasets.load_kdd99()
- x, y = get_rootkit_and_normal(kdd99_data)
- # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
- knn = KNeighborsClassifier(n_neighbors=3)
- # knn.fit(x_train, y_train)
- # print(knn.score(x_test, y_test))
- scores = cross_val_score(knn, x, y, cv=2, scoring='accuracy')
- print(scores.mean()) # 0.9777777777777777
- if __name__ == "__main__":
- main()
|