|
|
@@ -0,0 +1,70 @@ |
|
|
|
import numpy |
|
|
|
|
|
|
|
import torch |
|
|
|
import torch.nn as nn |
|
|
|
from torch.autograd import Variable |
|
|
|
import torch.nn.functional as F |
|
|
|
|
|
|
|
class HAN(nn.Module): |
|
|
|
def __init__(self, input_size, output_size, |
|
|
|
word_hidden_size, word_num_layers, word_context_size, |
|
|
|
sent_hidden_size, sent_num_layers, sent_context_size): |
|
|
|
super(HAN, self).__init__() |
|
|
|
|
|
|
|
self.word_layer = AttentionNet(input_size, |
|
|
|
word_hidden_size, |
|
|
|
word_num_layers, |
|
|
|
word_context_size) |
|
|
|
self.sent_layer = AttentionNet(2* word_hidden_size, |
|
|
|
sent_hidden_size, |
|
|
|
sent_num_layers, |
|
|
|
sent_context_size) |
|
|
|
self.output_layer = nn.Linear(2* sent_hidden_size, output_size) |
|
|
|
self.softmax = nn.Softmax() |
|
|
|
|
|
|
|
def forward(self, x, level='w'): |
|
|
|
# input is a sequence of vector |
|
|
|
# if level == w, a seq of words (a sent); level == s, a seq of sents (a doc) |
|
|
|
if level == 's': |
|
|
|
v = self.sent_layer(x) |
|
|
|
output = self.softmax(self.output_layer(v)) |
|
|
|
return output |
|
|
|
elif level == 'w': |
|
|
|
s = self.word_layer(x) |
|
|
|
return s |
|
|
|
else: |
|
|
|
print('unknow level in Parameter!') |
|
|
|
|
|
|
|
|
|
|
|
class AttentionNet(nn.Module): |
|
|
|
def __init__(self, input_size, gru_hidden_size, gru_num_layers, context_vec_size): |
|
|
|
super(AttentionNet, self).__init__() |
|
|
|
|
|
|
|
self.input_size = input_size |
|
|
|
self.gru_hidden_size = gru_hidden_size |
|
|
|
self.gru_num_layers = gru_num_layers |
|
|
|
self.context_vec_size = context_vec_size |
|
|
|
|
|
|
|
# Encoder |
|
|
|
self.gru = nn.GRU(input_size=input_size, |
|
|
|
hidden_size=gru_hidden_size, |
|
|
|
num_layers=gru_num_layers, |
|
|
|
batch_first=False, |
|
|
|
bidirectional=True) |
|
|
|
# Attention |
|
|
|
self.fc = nn.Linear(2* gru_hidden_size, context_vec_size) |
|
|
|
self.tanh = nn.Tanh() |
|
|
|
self.softmax = nn.Softmax() |
|
|
|
# context vector |
|
|
|
self.context_vec = nn.Parameter(torch.Tensor(context_vec_size, 1)) |
|
|
|
self.context_vec.data.uniform_(-0.1, 0.1) |
|
|
|
|
|
|
|
def forward(self, inputs): |
|
|
|
# inputs's dim seq_len*word_dim |
|
|
|
inputs = torch.unsqueeze(inputs, 1) |
|
|
|
h_t, hidden = self.gru(inputs) |
|
|
|
h_t = torch.squeeze(h_t, 1) |
|
|
|
u = self.tanh(self.fc(h_t)) |
|
|
|
alpha = self.softmax(torch.mm(u, self.context_vec)) |
|
|
|
output = torch.mm(h_t.t(), alpha) |
|
|
|
return output |