1234567891011121314151617181920212223242526272829 |
- from datasets import Datasets
- import tensorflow as tf
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- from sklearn.naive_bayes import GaussianNB
- def main():
- # 导入数据
- x, y = Datasets.load_movie_review()
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3)
- # 数据预处理,词袋
- tokenizer = tf.keras.preprocessing.text.Tokenizer()
- tokenizer.fit_on_texts(x)
- x_train = tokenizer.texts_to_sequences(x_train)
- x_test = tokenizer.texts_to_sequences(x_test)
- # 序列编码one-hot
- x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=200)
- x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=200)
- gnb = GaussianNB()
- gnb.fit(x_train, y_train)
- print(gnb.score(x_test, y_test)) # 0.5766666666666667
- if __name__ == "__main__":
- main()
|