From 8f60a4fa018107d3b54c0b233e313d0c7bbaad91 Mon Sep 17 00:00:00 2001 From: yunfan Date: Tue, 18 Sep 2018 15:57:44 +0800 Subject: [PATCH] update MLP --- docs/source/user/quickstart.rst | 2 +- fastNLP/modules/decoder/MLP.py | 24 ++++++++----------- .../main.py | 2 +- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/docs/source/user/quickstart.rst b/docs/source/user/quickstart.rst index c8340053..21f0855f 100644 --- a/docs/source/user/quickstart.rst +++ b/docs/source/user/quickstart.rst @@ -36,7 +36,7 @@ pre-processing data, constructing model and training model. self.enc = encoder.Conv( in_channels=300, out_channels=100, kernel_size=3) self.agg = aggregation.MaxPool() - self.dec = decoder.MLP(100, num_classes=num_classes) + self.dec = decoder.MLP([100, num_classes]) def forward(self, x): x = self.emb(x) # [N,L] -> [N,L,C] diff --git a/fastNLP/modules/decoder/MLP.py b/fastNLP/modules/decoder/MLP.py index b8fb95f0..2a4193b1 100644 --- a/fastNLP/modules/decoder/MLP.py +++ b/fastNLP/modules/decoder/MLP.py @@ -2,13 +2,15 @@ import torch import torch.nn as nn from fastNLP.modules.utils import initial_parameter class MLP(nn.Module): - def __init__(self, size_layer, num_class=2, activation='relu' , initial_method = None): + def __init__(self, size_layer, activation='relu' , initial_method = None): """Multilayer Perceptrons as a decoder - Args: - size_layer: list of int, define the size of MLP layers - num_class: int, num of class in output, should be 2 or the last layer's size - activation: str or function, the activation function for hidden layers + :param size_layer: list of int, define the size of MLP layers + :param activation: str or function, the activation function for hidden layers + + .. note:: + There is no activation function applying on output layer. + """ super(MLP, self).__init__() self.hiddens = nn.ModuleList() @@ -19,13 +21,6 @@ class MLP(nn.Module): else: self.hiddens.append(nn.Linear(size_layer[i-1], size_layer[i])) - if num_class == 2: - self.out_active = nn.LogSigmoid() - elif num_class == size_layer[-1]: - self.out_active = nn.LogSoftmax(dim=1) - else: - raise ValueError("should set output num_class correctly: {}".format(num_class)) - actives = { 'relu': nn.ReLU(), 'tanh': nn.Tanh() @@ -37,17 +32,18 @@ class MLP(nn.Module): else: raise ValueError("should set activation correctly: {}".format(activation)) initial_parameter(self, initial_method ) + def forward(self, x): for layer in self.hiddens: x = self.hidden_active(layer(x)) - x = self.out_active(self.output(x)) + x = self.output(x) return x if __name__ == '__main__': net1 = MLP([5,10,5]) - net2 = MLP([5,10,5], 5) + net2 = MLP([5,10,5], 'tanh') for net in [net1, net2]: x = torch.randn(5, 5) y = net(x) diff --git a/reproduction/LSTM+self_attention_sentiment_analysis/main.py b/reproduction/LSTM+self_attention_sentiment_analysis/main.py index 115d9a23..3b11f6be 100644 --- a/reproduction/LSTM+self_attention_sentiment_analysis/main.py +++ b/reproduction/LSTM+self_attention_sentiment_analysis/main.py @@ -53,7 +53,7 @@ class SELF_ATTENTION_YELP_CLASSIFICATION(BaseModel): self.embedding = Embedding(len(word2index) ,embeding_size , init_emb= None ) self.lstm = Lstm(input_size = embeding_size,hidden_size = lstm_hidden_size ,bidirectional = True) self.attention = SelfAttention(lstm_hidden_size * 2 ,dim =attention_unit ,num_vec=attention_hops) - self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ] ,num_class=class_num ,) + self.mlp = MLP(size_layer=[lstm_hidden_size * 2*attention_hops ,nfc ,class_num ]) def forward(self,x): x_emb = self.embedding(x) output = self.lstm(x_emb)