Browse Source

add batch

tags/v0.1.0
choocewhatulike 6 years ago
parent
commit
4dfa273be1
2 changed files with 58 additions and 21 deletions
  1. +57
    -20
      model_inplement/code/model.py
  2. +1
    -1
      model_inplement/code/train.py

+ 57
- 20
model_inplement/code/model.py View File

@@ -1,10 +1,23 @@
import numpy

import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.autograd import Variable from torch.autograd import Variable
import torch.nn.functional as F import torch.nn.functional as F


def pack_sequence(tensor_seq, padding_value=0.0):
if len(tensor_seq) <= 0:
return
length = [v.size(0) for v in tensor_seq]
max_len = max(length)
size = [len(tensor_seq), max_len]
size.extend(list(tensor_seq[0].size()[1:]))
ans = torch.Tensor(*size).fill_(padding_value)
if tensor_seq[0].data.is_cuda:
ans = ans.cuda()
ans = Variable(ans)
for i, v in enumerate(tensor_seq):
ans[i, :length[i], :] = v
return ans

class HAN(nn.Module): class HAN(nn.Module):
def __init__(self, input_size, output_size, def __init__(self, input_size, output_size,
word_hidden_size, word_num_layers, word_context_size, word_hidden_size, word_num_layers, word_context_size,
@@ -23,17 +36,14 @@ class HAN(nn.Module):
self.softmax = nn.LogSoftmax(dim=1) self.softmax = nn.LogSoftmax(dim=1)


def forward(self, batch_doc): def forward(self, batch_doc):
# input is a sequence of vector
# if level == w, a seq of words (a sent); level == s, a seq of sents (a doc)
# input is a sequence of matrix
doc_vec_list = [] doc_vec_list = []
for doc in batch_doc: for doc in batch_doc:
s_list = []
for sent in doc:
s_list.append(self.word_layer(sent))
s_vec = torch.cat(s_list, dim=0)
vec = self.sent_layer(s_vec)
doc_vec_list.append(vec)
doc_vec = torch.cat(doc_vec_list, dim=0)
# doc's dim (num_sent, seq_len, word_dim)
sent_mat = self.word_layer(doc)
# sent_mat's dim (num_sent, vec_dim)
doc_vec_list.append(sent_mat)
doc_vec = self.sent_layer(pack_sequence(doc_vec_list))
output = self.softmax(self.output_layer(doc_vec)) output = self.softmax(self.output_layer(doc_vec))
return output return output


@@ -50,25 +60,52 @@ class AttentionNet(nn.Module):
self.gru = nn.GRU(input_size=input_size, self.gru = nn.GRU(input_size=input_size,
hidden_size=gru_hidden_size, hidden_size=gru_hidden_size,
num_layers=gru_num_layers, num_layers=gru_num_layers,
batch_first=False,
batch_first=True,
bidirectional=True) bidirectional=True)
# Attention # Attention
self.fc = nn.Linear(2* gru_hidden_size, context_vec_size) self.fc = nn.Linear(2* gru_hidden_size, context_vec_size)
self.tanh = nn.Tanh() self.tanh = nn.Tanh()
self.softmax = nn.Softmax(dim=0)
self.softmax = nn.Softmax(dim=1)
# context vector # context vector
self.context_vec = nn.Parameter(torch.Tensor(context_vec_size, 1)) self.context_vec = nn.Parameter(torch.Tensor(context_vec_size, 1))
self.context_vec.data.uniform_(-0.1, 0.1) self.context_vec.data.uniform_(-0.1, 0.1)


def forward(self, inputs): def forward(self, inputs):
# inputs's dim (seq_len, word_dim)
inputs = torch.unsqueeze(inputs, 1)
# inputs's dim (batch_size, seq_len, word_dim)
h_t, hidden = self.gru(inputs) h_t, hidden = self.gru(inputs)
h_t = torch.squeeze(h_t, 1)
u = self.tanh(self.fc(h_t)) u = self.tanh(self.fc(h_t))
alpha = self.softmax(torch.mm(u, self.context_vec))
output = torch.mm(h_t.t(), alpha).t()
# output's dim (1, 2*hidden_size)
return output
# u's dim (batch_size, seq_len, context_vec_size)
alpha = self.softmax(torch.matmul(u, self.context_vec))
# alpha's dim (batch_size, seq_len, 1)
output = torch.bmm(torch.transpose(h_t, 1, 2), alpha)
# output's dim (batch_size, 2*hidden_size, 1)
return torch.squeeze(output, dim=2)




if __name__ == '__main__':
import numpy as np
use_cuda = True
net = HAN(input_size=200, output_size=5,
word_hidden_size=50, word_num_layers=1, word_context_size=100,
sent_hidden_size=50, sent_num_layers=1, sent_context_size=100)
optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
criterion = nn.NLLLoss()
test_time = 10
batch_size = 64
if use_cuda:
net.cuda()
print('test training')
for step in range(test_time):
x_data = [torch.randn(np.random.randint(1,10), 200, 200) for i in range(batch_size)]
y_data = torch.LongTensor([np.random.randint(0, 5) for i in range(batch_size)])
if use_cuda:
x_data = [x_i.cuda() for x_i in x_data]
y_data = y_data.cuda()
x = [Variable(x_i) for x_i in x_data]
y = Variable(y_data)
predict = net(x)
loss = criterion(predict, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss.data[0])

+ 1
- 1
model_inplement/code/train.py View File

@@ -136,7 +136,7 @@ def train(net, dataset, num_epoch, batch_size, print_size=10, use_cuda=False):
if use_cuda: if use_cuda:
sent_vec = sent_vec.cuda() sent_vec = sent_vec.cuda()
doc.append(Variable(sent_vec)) doc.append(Variable(sent_vec))
doc_list.append(doc)
doc_list.append(pack_sequence(doc))
if use_cuda: if use_cuda:
y = y.cuda() y = y.cuda()
y = Variable(y) y = Variable(y)


Loading…
Cancel
Save