From d5be1d2cabbde72148f387dda95c5a7629df449f Mon Sep 17 00:00:00 2001 From: JingyuanLiu Date: Mon, 19 Mar 2018 12:04:48 +0800 Subject: [PATCH] update --- CNN-sentence_classification/dataset.py | 6 +++--- CNN-sentence_classification/model.py | 20 +------------------- CNN-sentence_classification/train.py | 18 ++++++++++-------- 3 files changed, 14 insertions(+), 30 deletions(-) diff --git a/CNN-sentence_classification/dataset.py b/CNN-sentence_classification/dataset.py index b4ce1115..5a8fe2b7 100644 --- a/CNN-sentence_classification/dataset.py +++ b/CNN-sentence_classification/dataset.py @@ -83,9 +83,9 @@ class MRDataset(Dataset): embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300)) for word in word_dict: - word_id = word_dict[word] - if word in model.wv.vocab: - embedding_weights[word_id, :] = model[word] + word_id = word_dict[word] + if word in model.wv.vocab: + embedding_weights[word_id, :] = model[word] return embedding_weights diff --git a/CNN-sentence_classification/model.py b/CNN-sentence_classification/model.py index cfb5154b..be0098f4 100644 --- a/CNN-sentence_classification/model.py +++ b/CNN-sentence_classification/model.py @@ -5,28 +5,10 @@ import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable -from torch.utils.data import DataLoader, TensorDataset import dataset -""" -#some information -mode = "static" -use_pretrained_embedding = "gensim.word2vec" -print('MODE = {}'.format(mode)) -print('EMBEDDING = {}\n'.format(use_pretrained_embeddings) - -embedding_weights = dataset.word_embedding_300() -embed_num = len(embedding_weights) -embed_dim = 300 -class_num = 2 -len_sentence = 64 - -print('embedding size = {}'.format(embed_num)) -print('embedding dimension = {}'.format(embed_dim)) -print('sentence len n = {}'.format(len_sentence)) -print('num of classes = {}'.format(class_num)) -""" + class CNN_text(nn.Module): def __init__(self, kernel_h=[3,4,5], kernel_num=100, embed_num=1000, embed_dim=300, dropout=0.5, L2_constrain=3, batchsize=50, pretrained_embeddings=None): diff --git a/CNN-sentence_classification/train.py b/CNN-sentence_classification/train.py index e2e0b8e6..8b3801d2 100644 --- a/CNN-sentence_classification/train.py +++ b/CNN-sentence_classification/train.py @@ -40,7 +40,7 @@ test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) - +#cnn cnn = CNN_text(embed_num=len(dataset.word2id()), pretrained_embeddings=dataset.word_embeddings()) if cuda: @@ -51,6 +51,8 @@ if cuda: criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate) + +#train and test best_acc = None for epoch in range(num_epochs): @@ -59,9 +61,9 @@ for epoch in range(num_epochs): for i, (sents,labels) in enumerate(train_loader): sents = Variable(sents) labels = Variable(labels) - if cuda: - sents = sents.cuda() - labels = labels.cuda() + if cuda: + sents = sents.cuda() + labels = labels.cuda() optimizer.zero_grad() outputs = cnn(sents) loss = criterion(outputs, labels) @@ -78,8 +80,8 @@ for epoch in range(num_epochs): total = 0 for sents, labels in test_loader: sents = Variable(sents) - if cuda: - sents = sents.cuda() + if cuda: + sents = sents.cuda() labels = labels.cuda() outputs = cnn(sents) _, predicted = torch.max(outputs.data, 1) @@ -90,8 +92,8 @@ for epoch in range(num_epochs): if best_acc is None or acc > best_acc: best_acc = acc - if os.path.exists("models") is False: - os.makedirs("models") + if os.path.exists("models") is False: + os.makedirs("models") torch.save(cnn.state_dict(), 'models/cnn.pkl') else: learning_rate = learning_rate * 0.8