diff --git a/fastNLP/core/__init__.py b/fastNLP/core/__init__.py index 1003c824..dfe35f77 100644 --- a/fastNLP/core/__init__.py +++ b/fastNLP/core/__init__.py @@ -2,10 +2,10 @@ from .batch import Batch from .dataset import DataSet from .fieldarray import FieldArray from .instance import Instance +from .losses import Loss from .metrics import Evaluator, ClassifyEvaluator, SNLIEvaluator, SeqLabelEvaluator +from .optimizer import Optimizer from .sampler import SequentialSampler, BucketSampler, RandomSampler, BaseSampler from .tester import Tester from .trainer import Trainer from .vocabulary import Vocabulary -from .optimizer import Optimizer -from .loss import Loss diff --git a/fastNLP/core/loss.py b/fastNLP/core/loss.py deleted file mode 100644 index 093b3b96..00000000 --- a/fastNLP/core/loss.py +++ /dev/null @@ -1,196 +0,0 @@ -import torch - -def squash(predict , truth , **kwargs): - '''To reshape tensors in order to fit Loss functions in pytorch - - :param predict : Tensor, model output - :param truth : Tensor, truth from dataset - :param **kwargs : extra arguments - - :return predict , truth: predict & truth after processing - ''' - return predict.view(-1 , predict.size()[-1]) , truth.view(-1,) - -def unpad(predict , truth , **kwargs): - '''To process padded sequence output to get true loss - Using pack_padded_sequence() method - This method contains squash() - - :param predict : Tensor, [batch_size , max_len , tag_size] - :param truth : Tensor, [batch_size , max_len] - :param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist - kwargs["lens"] : list or LongTensor, [batch_size] - the i-th element is true lengths of i-th sequence - - :return predict , truth: predict & truth after processing - ''' - if kwargs.get("lens") is None: - return predict , truth - lens = torch.LongTensor(kwargs["lens"]) - lens , idx = torch.sort(lens , descending = True) - predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx] , lens , batch_first = True).data - truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx] , lens , batch_first = True).data - return predict , truth - -def unpad_mask(predict , truth , **kwargs): - '''To process padded sequence output to get true loss - Using mask() method - This method contains squash() - - :param predict : Tensor, [batch_size , max_len , tag_size] - :param truth : Tensor, [batch_size , max_len] - :param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist - kwargs["lens"] : list or LongTensor, [batch_size] - the i-th element is true lengths of i-th sequence - - :return predict , truth: predict & truth after processing - ''' - if kwargs.get("lens") is None: - return predict , truth - mas = make_mask(kwargs["lens"] , truth.size()[1]) - return mask(predict , truth , mask = mas) - -def mask(predict , truth , **kwargs): - '''To select specific elements from Tensor - This method contains squash() - - :param predict : Tensor, [batch_size , max_len , tag_size] - :param truth : Tensor, [batch_size , max_len] - :param **kwargs : extra arguments, kwargs["mask"] is expected to be exsist - kwargs["mask"] : ByteTensor, [batch_size , max_len] - the mask Tensor , the position that is 1 will be selected - - :return predict , truth: predict & truth after processing - ''' - if kwargs.get("mask") is None: - return predict , truth - mask = kwargs["mask"] - - predict , truth = squash(predict , truth) - mask = mask.view(-1,) - - predict = torch.masked_select(predict.permute(1,0) , mask).view(predict.size()[-1] , -1).permute(1,0) - truth = torch.masked_select(truth , mask) - - return predict , truth - -def make_mask(lens , tar_len): - '''to generate a mask that select [:lens[i]] for i-th element - embezzle from fastNLP.models.sequence_modeling.seq_mask - - :param lens : list or LongTensor, [batch_size] - :param tar_len : int - - :return mask : ByteTensor - ''' - lens = torch.LongTensor(lens) - mask = [torch.ge(lens, i + 1) for i in range(tar_len)] - mask = torch.stack(mask, 1) - return mask - -#map string to function. Just for more elegant using -method_dict = { - "squash" : squash, - "unpad" : unpad, - "unpad_mask" : unpad_mask, - "mask" : mask, -} - -loss_function_name = { - "L1Loss".lower() : torch.nn.L1Loss, - "BCELoss".lower() : torch.nn.BCELoss, - "MSELoss".lower() : torch.nn.MSELoss, - "NLLLoss".lower() : torch.nn.NLLLoss, - "KLDivLoss".lower() : torch.nn.KLDivLoss, - "NLLLoss2dLoss".lower() : torch.nn.NLLLoss2d, #every name should end with "loss" - "SmoothL1Loss".lower() : torch.nn.SmoothL1Loss, - "SoftMarginLoss".lower() : torch.nn.SoftMarginLoss, - "PoissonNLLLoss".lower() : torch.nn.PoissonNLLLoss, - "MultiMarginLoss".lower() : torch.nn.MultiMarginLoss, - "CrossEntropyLoss".lower() : torch.nn.CrossEntropyLoss, - "BCEWithLogitsLoss".lower() : torch.nn.BCEWithLogitsLoss, - "MarginRankingLoss".lower() : torch.nn.MarginRankingLoss, - "TripletMarginLoss".lower() : torch.nn.TripletMarginLoss, - "HingeEmbeddingLoss".lower() : torch.nn.HingeEmbeddingLoss, - "CosineEmbeddingLoss".lower() : torch.nn.CosineEmbeddingLoss, - "MultiLabelMarginLoss".lower() : torch.nn.MultiLabelMarginLoss, - "MultiLabelSoftMarginLoss".lower() : torch.nn.MultiLabelSoftMarginLoss, -} - -class Loss(object): - '''a Loss object is a callable object represents loss functions - ''' - - def __init__(self , loss_name , pre_pro = [squash], **kwargs): - ''' - - :param loss_name: str or None , the name of loss function - :param pre_pro : list of function or str, methods to reform parameters before calculating loss - the strings will be auto translated to pre-defined functions - :param **kwargs: kwargs for torch loss function - - pre_pro funcsions should have three arguments: predict, truth, **arg - predict and truth is the necessary parameters in loss function - kwargs is the extra parameters passed-in when calling loss function - pre_pro functions should return two objects, respectively predict and truth that after processed - - ''' - - if loss_name is None: - # this is useful when Trainer.__init__ performs type check - self._loss = None - else: - if not isinstance(loss_name, str): - raise NotImplementedError - else: - self._loss = self._get_loss(loss_name , **kwargs) - - self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro] - - def add_pre_pro(self , func): - '''add a pre_pro function - - :param func: a function or str, methods to reform parameters before calculating loss - the strings will be auto translated to pre-defined functions - ''' - if not callable(func): - func = method_dict.get(func) - if func is None: - return - self.pre_pro.append(func) - - @staticmethod - def _get_loss(loss_name , **kwargs): - '''Get loss function from torch - - :param loss_name: str, the name of loss function - :param **kwargs: kwargs for torch loss function - :return: A callable loss function object - ''' - loss_name = loss_name.strip().lower() - loss_name = "".join(loss_name.split("_")) - - if len(loss_name) < 4 or loss_name[-4 : ] != "loss": - loss_name += "loss" - return loss_function_name[loss_name](**kwargs) - - def get(self): - '''This method exists just for make some existing codes run error-freely - ''' - return self - - def __call__(self , predict , truth , **kwargs): - '''call a loss function - predict and truth will be processed by pre_pro methods in order of addition - - :param predict : Tensor, model output - :param truth : Tensor, truth from dataset - :param **kwargs : extra arguments, pass to pre_pro functions - for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens - ''' - for f in self.pre_pro: - if f is None: - continue - predict , truth = f(predict , truth , **kwargs) - - return self._loss(predict , truth) diff --git a/fastNLP/core/losses.py b/fastNLP/core/losses.py new file mode 100644 index 00000000..1e5a4914 --- /dev/null +++ b/fastNLP/core/losses.py @@ -0,0 +1,219 @@ +import torch + + +class LossBase(object): + def __init__(self): + self.param_map = {} + + def get_loss(self, *args, **kwargs): + raise NotImplementedError + + def __call__(self, output_dict, predict_dict): + pass + + +class Loss(LossBase): + def __init__(self): + pass + + +def squash(predict, truth, **kwargs): + '''To reshape tensors in order to fit Loss functions in pytorch + + :param predict : Tensor, model output + :param truth : Tensor, truth from dataset + :param **kwargs : extra arguments + + :return predict , truth: predict & truth after processing + ''' + return predict.view(-1, predict.size()[-1]), truth.view(-1, ) + + +def unpad(predict, truth, **kwargs): + '''To process padded sequence output to get true loss + Using pack_padded_sequence() method + This method contains squash() + + :param predict : Tensor, [batch_size , max_len , tag_size] + :param truth : Tensor, [batch_size , max_len] + :param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist + kwargs["lens"] : list or LongTensor, [batch_size] + the i-th element is true lengths of i-th sequence + + :return predict , truth: predict & truth after processing + ''' + if kwargs.get("lens") is None: + return predict, truth + lens = torch.LongTensor(kwargs["lens"]) + lens, idx = torch.sort(lens, descending=True) + predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx], lens, batch_first=True).data + truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx], lens, batch_first=True).data + return predict, truth + + +def unpad_mask(predict, truth, **kwargs): + '''To process padded sequence output to get true loss + Using mask() method + This method contains squash() + + :param predict : Tensor, [batch_size , max_len , tag_size] + :param truth : Tensor, [batch_size , max_len] + :param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist + kwargs["lens"] : list or LongTensor, [batch_size] + the i-th element is true lengths of i-th sequence + + :return predict , truth: predict & truth after processing + ''' + if kwargs.get("lens") is None: + return predict, truth + mas = make_mask(kwargs["lens"], truth.size()[1]) + return mask(predict, truth, mask=mas) + + +def mask(predict, truth, **kwargs): + '''To select specific elements from Tensor + This method contains squash() + + :param predict : Tensor, [batch_size , max_len , tag_size] + :param truth : Tensor, [batch_size , max_len] + :param **kwargs : extra arguments, kwargs["mask"] is expected to be exsist + kwargs["mask"] : ByteTensor, [batch_size , max_len] + the mask Tensor , the position that is 1 will be selected + + :return predict , truth: predict & truth after processing + ''' + if kwargs.get("mask") is None: + return predict, truth + mask = kwargs["mask"] + + predict, truth = squash(predict, truth) + mask = mask.view(-1, ) + + predict = torch.masked_select(predict.permute(1, 0), mask).view(predict.size()[-1], -1).permute(1, 0) + truth = torch.masked_select(truth, mask) + + return predict, truth + + +def make_mask(lens, tar_len): + '''to generate a mask that select [:lens[i]] for i-th element + embezzle from fastNLP.models.sequence_modeling.seq_mask + + :param lens : list or LongTensor, [batch_size] + :param tar_len : int + + :return mask : ByteTensor + ''' + lens = torch.LongTensor(lens) + mask = [torch.ge(lens, i + 1) for i in range(tar_len)] + mask = torch.stack(mask, 1) + return mask + + +# map string to function. Just for more elegant using +method_dict = { + "squash": squash, + "unpad": unpad, + "unpad_mask": unpad_mask, + "mask": mask, +} + +loss_function_name = { + "L1Loss".lower(): torch.nn.L1Loss, + "BCELoss".lower(): torch.nn.BCELoss, + "MSELoss".lower(): torch.nn.MSELoss, + "NLLLoss".lower(): torch.nn.NLLLoss, + "KLDivLoss".lower(): torch.nn.KLDivLoss, + "NLLLoss2dLoss".lower(): torch.nn.NLLLoss2d, # every name should end with "loss" + "SmoothL1Loss".lower(): torch.nn.SmoothL1Loss, + "SoftMarginLoss".lower(): torch.nn.SoftMarginLoss, + "PoissonNLLLoss".lower(): torch.nn.PoissonNLLLoss, + "MultiMarginLoss".lower(): torch.nn.MultiMarginLoss, + "CrossEntropyLoss".lower(): torch.nn.CrossEntropyLoss, + "BCEWithLogitsLoss".lower(): torch.nn.BCEWithLogitsLoss, + "MarginRankingLoss".lower(): torch.nn.MarginRankingLoss, + "TripletMarginLoss".lower(): torch.nn.TripletMarginLoss, + "HingeEmbeddingLoss".lower(): torch.nn.HingeEmbeddingLoss, + "CosineEmbeddingLoss".lower(): torch.nn.CosineEmbeddingLoss, + "MultiLabelMarginLoss".lower(): torch.nn.MultiLabelMarginLoss, + "MultiLabelSoftMarginLoss".lower(): torch.nn.MultiLabelSoftMarginLoss, +} + + +class Loss(object): + '''a Loss object is a callable object represents loss functions + ''' + + def __init__(self, loss_name, pre_pro=[squash], **kwargs): + ''' + + :param loss_name: str or None , the name of loss function + :param pre_pro : list of function or str, methods to reform parameters before calculating loss + the strings will be auto translated to pre-defined functions + :param **kwargs: kwargs for torch loss function + + pre_pro funcsions should have three arguments: predict, truth, **arg + predict and truth is the necessary parameters in loss function + kwargs is the extra parameters passed-in when calling loss function + pre_pro functions should return two objects, respectively predict and truth that after processed + + ''' + + if loss_name is None: + # this is useful when Trainer.__init__ performs type check + self._loss = None + else: + if not isinstance(loss_name, str): + raise NotImplementedError + else: + self._loss = self._get_loss(loss_name, **kwargs) + + self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro] + + def add_pre_pro(self, func): + '''add a pre_pro function + + :param func: a function or str, methods to reform parameters before calculating loss + the strings will be auto translated to pre-defined functions + ''' + if not callable(func): + func = method_dict.get(func) + if func is None: + return + self.pre_pro.append(func) + + @staticmethod + def _get_loss(loss_name, **kwargs): + '''Get loss function from torch + + :param loss_name: str, the name of loss function + :param **kwargs: kwargs for torch loss function + :return: A callable loss function object + ''' + loss_name = loss_name.strip().lower() + loss_name = "".join(loss_name.split("_")) + + if len(loss_name) < 4 or loss_name[-4:] != "loss": + loss_name += "loss" + return loss_function_name[loss_name](**kwargs) + + def get(self): + '''This method exists just for make some existing codes run error-freely + ''' + return self + + def __call__(self, predict, truth, **kwargs): + '''call a loss function + predict and truth will be processed by pre_pro methods in order of addition + + :param predict : Tensor, model output + :param truth : Tensor, truth from dataset + :param **kwargs : extra arguments, pass to pre_pro functions + for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens + ''' + for f in self.pre_pro: + if f is None: + continue + predict, truth = f(predict, truth, **kwargs) + + return self._loss(predict, truth) diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py index 6b0398b5..26362cb9 100644 --- a/fastNLP/core/trainer.py +++ b/fastNLP/core/trainer.py @@ -1,27 +1,25 @@ +import itertools +import os import time -from datetime import timedelta -from datetime import datetime import warnings from collections import defaultdict -import os -import itertools -import shutil +from datetime import datetime +from datetime import timedelta -from tensorboardX import SummaryWriter import torch +from tensorboardX import SummaryWriter from fastNLP.core.batch import Batch -from fastNLP.core.loss import Loss -from fastNLP.core.metrics import Evaluator from fastNLP.core.optimizer import Optimizer from fastNLP.core.sampler import RandomSampler from fastNLP.core.sampler import SequentialSampler from fastNLP.core.tester import Tester -from fastNLP.core.utils import _check_arg_dict_list from fastNLP.core.utils import _build_args +from fastNLP.core.utils import _check_arg_dict_list from fastNLP.core.utils import _syn_model_data from fastNLP.core.utils import get_func_signature + class Trainer(object): """Main Training Loop diff --git a/test/core/test_loss.py b/test/core/test_loss.py index d45d54e3..fdde4f0e 100644 --- a/test/core/test_loss.py +++ b/test/core/test_loss.py @@ -1,9 +1,10 @@ +import math import unittest -import fastNLP.core.loss as loss -import math import torch as tc -import pdb + +import fastNLP.core.losses as loss + class TestLoss(unittest.TestCase):