Browse Source

update

tags/v0.1.0
JingyuanLiu 6 years ago
parent
commit
d5be1d2cab
3 changed files with 14 additions and 30 deletions
  1. +3
    -3
      CNN-sentence_classification/dataset.py
  2. +1
    -19
      CNN-sentence_classification/model.py
  3. +10
    -8
      CNN-sentence_classification/train.py

+ 3
- 3
CNN-sentence_classification/dataset.py View File

@@ -83,9 +83,9 @@ class MRDataset(Dataset):
embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300)) embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300))


for word in word_dict: for word in word_dict:
word_id = word_dict[word]
if word in model.wv.vocab:
embedding_weights[word_id, :] = model[word]
word_id = word_dict[word]
if word in model.wv.vocab:
embedding_weights[word_id, :] = model[word]


return embedding_weights return embedding_weights




+ 1
- 19
CNN-sentence_classification/model.py View File

@@ -5,28 +5,10 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.autograd import Variable from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset


import dataset import dataset


"""
#some information
mode = "static"
use_pretrained_embedding = "gensim.word2vec"
print('MODE = {}'.format(mode))
print('EMBEDDING = {}\n'.format(use_pretrained_embeddings)

embedding_weights = dataset.word_embedding_300()
embed_num = len(embedding_weights)
embed_dim = 300
class_num = 2
len_sentence = 64

print('embedding size = {}'.format(embed_num))
print('embedding dimension = {}'.format(embed_dim))
print('sentence len n = {}'.format(len_sentence))
print('num of classes = {}'.format(class_num))
"""



class CNN_text(nn.Module): class CNN_text(nn.Module):
def __init__(self, kernel_h=[3,4,5], kernel_num=100, embed_num=1000, embed_dim=300, dropout=0.5, L2_constrain=3, batchsize=50, pretrained_embeddings=None): def __init__(self, kernel_h=[3,4,5], kernel_num=100, embed_num=1000, embed_dim=300, dropout=0.5, L2_constrain=3, batchsize=50, pretrained_embeddings=None):


+ 10
- 8
CNN-sentence_classification/train.py View File

@@ -40,7 +40,7 @@ test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size, batch_size=batch_size,
shuffle=False) shuffle=False)
#cnn


cnn = CNN_text(embed_num=len(dataset.word2id()), pretrained_embeddings=dataset.word_embeddings()) cnn = CNN_text(embed_num=len(dataset.word2id()), pretrained_embeddings=dataset.word_embeddings())
if cuda: if cuda:
@@ -51,6 +51,8 @@ if cuda:
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate) optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)



#train and test
best_acc = None best_acc = None


for epoch in range(num_epochs): for epoch in range(num_epochs):
@@ -59,9 +61,9 @@ for epoch in range(num_epochs):
for i, (sents,labels) in enumerate(train_loader): for i, (sents,labels) in enumerate(train_loader):
sents = Variable(sents) sents = Variable(sents)
labels = Variable(labels) labels = Variable(labels)
if cuda:
sents = sents.cuda()
labels = labels.cuda()
if cuda:
sents = sents.cuda()
labels = labels.cuda()
optimizer.zero_grad() optimizer.zero_grad()
outputs = cnn(sents) outputs = cnn(sents)
loss = criterion(outputs, labels) loss = criterion(outputs, labels)
@@ -78,8 +80,8 @@ for epoch in range(num_epochs):
total = 0 total = 0
for sents, labels in test_loader: for sents, labels in test_loader:
sents = Variable(sents) sents = Variable(sents)
if cuda:
sents = sents.cuda()
if cuda:
sents = sents.cuda()
labels = labels.cuda() labels = labels.cuda()
outputs = cnn(sents) outputs = cnn(sents)
_, predicted = torch.max(outputs.data, 1) _, predicted = torch.max(outputs.data, 1)
@@ -90,8 +92,8 @@ for epoch in range(num_epochs):
if best_acc is None or acc > best_acc: if best_acc is None or acc > best_acc:
best_acc = acc best_acc = acc
if os.path.exists("models") is False:
os.makedirs("models")
if os.path.exists("models") is False:
os.makedirs("models")
torch.save(cnn.state_dict(), 'models/cnn.pkl') torch.save(cnn.state_dict(), 'models/cnn.pkl')
else: else:
learning_rate = learning_rate * 0.8 learning_rate = learning_rate * 0.8


Loading…
Cancel
Save