diff --git a/fastNLP/core/callback.py b/fastNLP/core/callback.py index b1a480cc..d941c235 100644 --- a/fastNLP/core/callback.py +++ b/fastNLP/core/callback.py @@ -248,7 +248,10 @@ class GradientClipCallback(Callback): self.clip_value = clip_value def on_backward_end(self, model): - self.clip_fun(model.parameters(), self.clip_value) + if self.parameters is None: + self.clip_fun(model.parameters(), self.clip_value) + else: + self.clip_fun(self.parameters, self.clip_value) class CallbackException(BaseException): @@ -306,7 +309,6 @@ class LRScheduler(Callback): def on_epoch_begin(self, cur_epoch, total_epoch): self.scheduler.step() - print("scheduler step ", "lr=", self.trainer.optimizer.param_groups[0]["lr"]) class ControlC(Callback): diff --git a/fastNLP/modules/decoder/MLP.py b/fastNLP/modules/decoder/MLP.py index c9198859..b76fdab7 100644 --- a/fastNLP/modules/decoder/MLP.py +++ b/fastNLP/modules/decoder/MLP.py @@ -7,7 +7,7 @@ from fastNLP.modules.utils import initial_parameter class MLP(nn.Module): """Multilayer Perceptrons as a decoder - :param list size_layer: list of int, define the size of MLP layers. + :param list size_layer: list of int, define the size of MLP layers. layer的层数为(len(size_layer)-1)//2 + 1 :param str activation: str or function, the activation function for hidden layers. :param str initial_method: the name of initialization method. :param float dropout: the probability of dropout.