16-3-NaiveBayesian-recognise-malicious-comments.py 891 B

1234567891011121314151617181920212223242526272829
  1. from datasets import Datasets
  2. import tensorflow as tf
  3. from sklearn.model_selection import train_test_split
  4. import matplotlib.pyplot as plt
  5. from sklearn.naive_bayes import GaussianNB
  6. def main():
  7. # 导入数据
  8. x, y = Datasets.load_movie_review()
  9. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3)
  10. # 数据预处理,词袋
  11. tokenizer = tf.keras.preprocessing.text.Tokenizer()
  12. tokenizer.fit_on_texts(x)
  13. x_train = tokenizer.texts_to_sequences(x_train)
  14. x_test = tokenizer.texts_to_sequences(x_test)
  15. # 序列编码one-hot
  16. x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=200)
  17. x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=200)
  18. gnb = GaussianNB()
  19. gnb.fit(x_train, y_train)
  20. print(gnb.score(x_test, y_test)) # 0.5766666666666667
  21. if __name__ == "__main__":
  22. main()