12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- import tensorflow as tf
- import matplotlib.pyplot as plt
- from datasets import Datasets
- from sklearn.model_selection import train_test_split
- def main():
-
- x, y = Datasets.load_enron1()
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3)
-
- tokenizer = tf.keras.preprocessing.text.Tokenizer()
- tokenizer.fit_on_texts(x)
- x_train = tokenizer.texts_to_sequences(x_train)
- x_test = tokenizer.texts_to_sequences(x_test)
- num_words = len(tokenizer.word_index)
-
- x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=1024)
- x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=1024)
- y_train = tf.keras.utils.to_categorical(y_train, num_classes=2)
- y_test = tf.keras.utils.to_categorical(y_test, num_classes=2)
-
- model = tf.keras.Sequential([
- tf.keras.layers.Embedding(
- input_dim=num_words + 1,
- output_dim=300,
- input_length=1024,
- trainable=True,
- ),
-
- tf.keras.layers.Conv1D(
- filters=128,
- kernel_size=3,
- padding='valid',
- activation="relu",
- ),
- tf.keras.layers.Conv1D(filters=128, kernel_size=4, padding='valid', activation="relu"),
- tf.keras.layers.MaxPool1D(pool_size=2),
- tf.keras.layers.Dropout(.5),
- tf.keras.layers.Flatten(),
- tf.keras.layers.Dense(2, activation="softmax"),
- ])
-
- model.compile(
- optimizer="adam",
- loss="categorical_crossentropy",
- metrics=["acc"],
- )
-
- history = model.fit(
- x_train, y_train,
- batch_size=32,
- epochs=5,
- validation_data=(x_test, y_test),
- )
-
-
- plt.plot(history.epoch, history.history.get("acc"))
- plt.plot(history.epoch, history.history.get("val_acc"))
- plt.show()
- if __name__ == "__main__":
- main()
|