Browse Source

Merge pull request #7 from fastnlp/master

update
tags/v0.2.0
lyhuang18 GitHub 6 years ago
parent
commit
40a08def5f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 16 deletions
  1. +1
    -1
      docs/source/user/quickstart.rst
  2. +10
    -14
      fastNLP/modules/decoder/MLP.py
  3. +1
    -1
      reproduction/LSTM+self_attention_sentiment_analysis/main.py

+ 1
- 1
docs/source/user/quickstart.rst View File

@@ -36,7 +36,7 @@ pre-processing data, constructing model and training model.
self.enc = encoder.Conv( self.enc = encoder.Conv(
in_channels=300, out_channels=100, kernel_size=3) in_channels=300, out_channels=100, kernel_size=3)
self.agg = aggregation.MaxPool() self.agg = aggregation.MaxPool()
self.dec = decoder.MLP(100, num_classes=num_classes)
self.dec = decoder.MLP([100, num_classes])


def forward(self, x): def forward(self, x):
x = self.emb(x) # [N,L] -> [N,L,C] x = self.emb(x) # [N,L] -> [N,L,C]


+ 10
- 14
fastNLP/modules/decoder/MLP.py View File

@@ -2,13 +2,15 @@ import torch
import torch.nn as nn import torch.nn as nn
from fastNLP.modules.utils import initial_parameter from fastNLP.modules.utils import initial_parameter
class MLP(nn.Module): class MLP(nn.Module):
def __init__(self, size_layer, num_class=2, activation='relu' , initial_method = None):
def __init__(self, size_layer, activation='relu' , initial_method = None):
"""Multilayer Perceptrons as a decoder """Multilayer Perceptrons as a decoder


Args:
size_layer: list of int, define the size of MLP layers
num_class: int, num of class in output, should be 2 or the last layer's size
activation: str or function, the activation function for hidden layers
:param size_layer: list of int, define the size of MLP layers
:param activation: str or function, the activation function for hidden layers

.. note::
There is no activation function applying on output layer.

""" """
super(MLP, self).__init__() super(MLP, self).__init__()
self.hiddens = nn.ModuleList() self.hiddens = nn.ModuleList()
@@ -19,13 +21,6 @@ class MLP(nn.Module):
else: else:
self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i])) self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i]))


if num_class == 2:
self.out_active = nn.LogSigmoid()
elif num_class == size_layer[-1]:
self.out_active = nn.LogSoftmax(dim=1)
else:
raise ValueError("should set output num_class correctly: {}".format(num_class))
actives = { actives = {
'relu': nn.ReLU(), 'relu': nn.ReLU(),
'tanh': nn.Tanh() 'tanh': nn.Tanh()
@@ -37,17 +32,18 @@ class MLP(nn.Module):
else: else:
raise ValueError("should set activation correctly: {}".format(activation)) raise ValueError("should set activation correctly: {}".format(activation))
initial_parameter(self, initial_method ) initial_parameter(self, initial_method )

def forward(self, x): def forward(self, x):
for layer in self.hiddens: for layer in self.hiddens:
x = self.hidden_active(layer(x)) x = self.hidden_active(layer(x))
x = self.out_active(self.output(x))
x = self.output(x)
return x return x






if __name__ == '__main__': if __name__ == '__main__':
net1 = MLP([5,10,5]) net1 = MLP([5,10,5])
net2 = MLP([5,10,5], 5)
net2 = MLP([5,10,5], 'tanh')
for net in [net1, net2]: for net in [net1, net2]:
x = torch.randn(5, 5) x = torch.randn(5, 5)
y = net(x) y = net(x)


+ 1
- 1
reproduction/LSTM+self_attention_sentiment_analysis/main.py View File

@@ -53,7 +53,7 @@ class SELF_ATTENTION_YELP_CLASSIFICATION(BaseModel):
self.embedding = Embedding(len(word2index) ,embeding_size , init_emb= None ) self.embedding = Embedding(len(word2index) ,embeding_size , init_emb= None )
self.lstm = Lstm(input_size = embeding_size,hidden_size = lstm_hidden_size ,bidirectional = True) self.lstm = Lstm(input_size = embeding_size,hidden_size = lstm_hidden_size ,bidirectional = True)
self.attention = SelfAttention(lstm_hidden_size * 2 ,dim =attention_unit ,num_vec=attention_hops) self.attention = SelfAttention(lstm_hidden_size * 2 ,dim =attention_unit ,num_vec=attention_hops)
self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ] ,num_class=class_num ,)
self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ])
def forward(self,x): def forward(self,x):
x_emb = self.embedding(x) x_emb = self.embedding(x)
output = self.lstm(x_emb) output = self.lstm(x_emb)


Loading…
Cancel
Save