@@ -2,10 +2,10 @@ from .batch import Batch | |||
from .dataset import DataSet | |||
from .fieldarray import FieldArray | |||
from .instance import Instance | |||
from .losses import Loss | |||
from .metrics import Evaluator, ClassifyEvaluator, SNLIEvaluator, SeqLabelEvaluator | |||
from .optimizer import Optimizer | |||
from .sampler import SequentialSampler, BucketSampler, RandomSampler, BaseSampler | |||
from .tester import Tester | |||
from .trainer import Trainer | |||
from .vocabulary import Vocabulary | |||
from .optimizer import Optimizer | |||
from .loss import Loss |
@@ -1,196 +0,0 @@ | |||
import torch | |||
def squash(predict , truth , **kwargs): | |||
'''To reshape tensors in order to fit Loss functions in pytorch | |||
:param predict : Tensor, model output | |||
:param truth : Tensor, truth from dataset | |||
:param **kwargs : extra arguments | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
return predict.view(-1 , predict.size()[-1]) , truth.view(-1,) | |||
def unpad(predict , truth , **kwargs): | |||
'''To process padded sequence output to get true loss | |||
Using pack_padded_sequence() method | |||
This method contains squash() | |||
:param predict : Tensor, [batch_size , max_len , tag_size] | |||
:param truth : Tensor, [batch_size , max_len] | |||
:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist | |||
kwargs["lens"] : list or LongTensor, [batch_size] | |||
the i-th element is true lengths of i-th sequence | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
if kwargs.get("lens") is None: | |||
return predict , truth | |||
lens = torch.LongTensor(kwargs["lens"]) | |||
lens , idx = torch.sort(lens , descending = True) | |||
predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx] , lens , batch_first = True).data | |||
truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx] , lens , batch_first = True).data | |||
return predict , truth | |||
def unpad_mask(predict , truth , **kwargs): | |||
'''To process padded sequence output to get true loss | |||
Using mask() method | |||
This method contains squash() | |||
:param predict : Tensor, [batch_size , max_len , tag_size] | |||
:param truth : Tensor, [batch_size , max_len] | |||
:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist | |||
kwargs["lens"] : list or LongTensor, [batch_size] | |||
the i-th element is true lengths of i-th sequence | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
if kwargs.get("lens") is None: | |||
return predict , truth | |||
mas = make_mask(kwargs["lens"] , truth.size()[1]) | |||
return mask(predict , truth , mask = mas) | |||
def mask(predict , truth , **kwargs): | |||
'''To select specific elements from Tensor | |||
This method contains squash() | |||
:param predict : Tensor, [batch_size , max_len , tag_size] | |||
:param truth : Tensor, [batch_size , max_len] | |||
:param **kwargs : extra arguments, kwargs["mask"] is expected to be exsist | |||
kwargs["mask"] : ByteTensor, [batch_size , max_len] | |||
the mask Tensor , the position that is 1 will be selected | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
if kwargs.get("mask") is None: | |||
return predict , truth | |||
mask = kwargs["mask"] | |||
predict , truth = squash(predict , truth) | |||
mask = mask.view(-1,) | |||
predict = torch.masked_select(predict.permute(1,0) , mask).view(predict.size()[-1] , -1).permute(1,0) | |||
truth = torch.masked_select(truth , mask) | |||
return predict , truth | |||
def make_mask(lens , tar_len): | |||
'''to generate a mask that select [:lens[i]] for i-th element | |||
embezzle from fastNLP.models.sequence_modeling.seq_mask | |||
:param lens : list or LongTensor, [batch_size] | |||
:param tar_len : int | |||
:return mask : ByteTensor | |||
''' | |||
lens = torch.LongTensor(lens) | |||
mask = [torch.ge(lens, i + 1) for i in range(tar_len)] | |||
mask = torch.stack(mask, 1) | |||
return mask | |||
#map string to function. Just for more elegant using | |||
method_dict = { | |||
"squash" : squash, | |||
"unpad" : unpad, | |||
"unpad_mask" : unpad_mask, | |||
"mask" : mask, | |||
} | |||
loss_function_name = { | |||
"L1Loss".lower() : torch.nn.L1Loss, | |||
"BCELoss".lower() : torch.nn.BCELoss, | |||
"MSELoss".lower() : torch.nn.MSELoss, | |||
"NLLLoss".lower() : torch.nn.NLLLoss, | |||
"KLDivLoss".lower() : torch.nn.KLDivLoss, | |||
"NLLLoss2dLoss".lower() : torch.nn.NLLLoss2d, #every name should end with "loss" | |||
"SmoothL1Loss".lower() : torch.nn.SmoothL1Loss, | |||
"SoftMarginLoss".lower() : torch.nn.SoftMarginLoss, | |||
"PoissonNLLLoss".lower() : torch.nn.PoissonNLLLoss, | |||
"MultiMarginLoss".lower() : torch.nn.MultiMarginLoss, | |||
"CrossEntropyLoss".lower() : torch.nn.CrossEntropyLoss, | |||
"BCEWithLogitsLoss".lower() : torch.nn.BCEWithLogitsLoss, | |||
"MarginRankingLoss".lower() : torch.nn.MarginRankingLoss, | |||
"TripletMarginLoss".lower() : torch.nn.TripletMarginLoss, | |||
"HingeEmbeddingLoss".lower() : torch.nn.HingeEmbeddingLoss, | |||
"CosineEmbeddingLoss".lower() : torch.nn.CosineEmbeddingLoss, | |||
"MultiLabelMarginLoss".lower() : torch.nn.MultiLabelMarginLoss, | |||
"MultiLabelSoftMarginLoss".lower() : torch.nn.MultiLabelSoftMarginLoss, | |||
} | |||
class Loss(object): | |||
'''a Loss object is a callable object represents loss functions | |||
''' | |||
def __init__(self , loss_name , pre_pro = [squash], **kwargs): | |||
''' | |||
:param loss_name: str or None , the name of loss function | |||
:param pre_pro : list of function or str, methods to reform parameters before calculating loss | |||
the strings will be auto translated to pre-defined functions | |||
:param **kwargs: kwargs for torch loss function | |||
pre_pro funcsions should have three arguments: predict, truth, **arg | |||
predict and truth is the necessary parameters in loss function | |||
kwargs is the extra parameters passed-in when calling loss function | |||
pre_pro functions should return two objects, respectively predict and truth that after processed | |||
''' | |||
if loss_name is None: | |||
# this is useful when Trainer.__init__ performs type check | |||
self._loss = None | |||
else: | |||
if not isinstance(loss_name, str): | |||
raise NotImplementedError | |||
else: | |||
self._loss = self._get_loss(loss_name , **kwargs) | |||
self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro] | |||
def add_pre_pro(self , func): | |||
'''add a pre_pro function | |||
:param func: a function or str, methods to reform parameters before calculating loss | |||
the strings will be auto translated to pre-defined functions | |||
''' | |||
if not callable(func): | |||
func = method_dict.get(func) | |||
if func is None: | |||
return | |||
self.pre_pro.append(func) | |||
@staticmethod | |||
def _get_loss(loss_name , **kwargs): | |||
'''Get loss function from torch | |||
:param loss_name: str, the name of loss function | |||
:param **kwargs: kwargs for torch loss function | |||
:return: A callable loss function object | |||
''' | |||
loss_name = loss_name.strip().lower() | |||
loss_name = "".join(loss_name.split("_")) | |||
if len(loss_name) < 4 or loss_name[-4 : ] != "loss": | |||
loss_name += "loss" | |||
return loss_function_name[loss_name](**kwargs) | |||
def get(self): | |||
'''This method exists just for make some existing codes run error-freely | |||
''' | |||
return self | |||
def __call__(self , predict , truth , **kwargs): | |||
'''call a loss function | |||
predict and truth will be processed by pre_pro methods in order of addition | |||
:param predict : Tensor, model output | |||
:param truth : Tensor, truth from dataset | |||
:param **kwargs : extra arguments, pass to pre_pro functions | |||
for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens | |||
''' | |||
for f in self.pre_pro: | |||
if f is None: | |||
continue | |||
predict , truth = f(predict , truth , **kwargs) | |||
return self._loss(predict , truth) |
@@ -0,0 +1,219 @@ | |||
import torch | |||
class LossBase(object): | |||
def __init__(self): | |||
self.param_map = {} | |||
def get_loss(self, *args, **kwargs): | |||
raise NotImplementedError | |||
def __call__(self, output_dict, predict_dict): | |||
pass | |||
class Loss(LossBase): | |||
def __init__(self): | |||
pass | |||
def squash(predict, truth, **kwargs): | |||
'''To reshape tensors in order to fit Loss functions in pytorch | |||
:param predict : Tensor, model output | |||
:param truth : Tensor, truth from dataset | |||
:param **kwargs : extra arguments | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
return predict.view(-1, predict.size()[-1]), truth.view(-1, ) | |||
def unpad(predict, truth, **kwargs): | |||
'''To process padded sequence output to get true loss | |||
Using pack_padded_sequence() method | |||
This method contains squash() | |||
:param predict : Tensor, [batch_size , max_len , tag_size] | |||
:param truth : Tensor, [batch_size , max_len] | |||
:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist | |||
kwargs["lens"] : list or LongTensor, [batch_size] | |||
the i-th element is true lengths of i-th sequence | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
if kwargs.get("lens") is None: | |||
return predict, truth | |||
lens = torch.LongTensor(kwargs["lens"]) | |||
lens, idx = torch.sort(lens, descending=True) | |||
predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx], lens, batch_first=True).data | |||
truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx], lens, batch_first=True).data | |||
return predict, truth | |||
def unpad_mask(predict, truth, **kwargs): | |||
'''To process padded sequence output to get true loss | |||
Using mask() method | |||
This method contains squash() | |||
:param predict : Tensor, [batch_size , max_len , tag_size] | |||
:param truth : Tensor, [batch_size , max_len] | |||
:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist | |||
kwargs["lens"] : list or LongTensor, [batch_size] | |||
the i-th element is true lengths of i-th sequence | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
if kwargs.get("lens") is None: | |||
return predict, truth | |||
mas = make_mask(kwargs["lens"], truth.size()[1]) | |||
return mask(predict, truth, mask=mas) | |||
def mask(predict, truth, **kwargs): | |||
'''To select specific elements from Tensor | |||
This method contains squash() | |||
:param predict : Tensor, [batch_size , max_len , tag_size] | |||
:param truth : Tensor, [batch_size , max_len] | |||
:param **kwargs : extra arguments, kwargs["mask"] is expected to be exsist | |||
kwargs["mask"] : ByteTensor, [batch_size , max_len] | |||
the mask Tensor , the position that is 1 will be selected | |||
:return predict , truth: predict & truth after processing | |||
''' | |||
if kwargs.get("mask") is None: | |||
return predict, truth | |||
mask = kwargs["mask"] | |||
predict, truth = squash(predict, truth) | |||
mask = mask.view(-1, ) | |||
predict = torch.masked_select(predict.permute(1, 0), mask).view(predict.size()[-1], -1).permute(1, 0) | |||
truth = torch.masked_select(truth, mask) | |||
return predict, truth | |||
def make_mask(lens, tar_len): | |||
'''to generate a mask that select [:lens[i]] for i-th element | |||
embezzle from fastNLP.models.sequence_modeling.seq_mask | |||
:param lens : list or LongTensor, [batch_size] | |||
:param tar_len : int | |||
:return mask : ByteTensor | |||
''' | |||
lens = torch.LongTensor(lens) | |||
mask = [torch.ge(lens, i + 1) for i in range(tar_len)] | |||
mask = torch.stack(mask, 1) | |||
return mask | |||
# map string to function. Just for more elegant using | |||
method_dict = { | |||
"squash": squash, | |||
"unpad": unpad, | |||
"unpad_mask": unpad_mask, | |||
"mask": mask, | |||
} | |||
loss_function_name = { | |||
"L1Loss".lower(): torch.nn.L1Loss, | |||
"BCELoss".lower(): torch.nn.BCELoss, | |||
"MSELoss".lower(): torch.nn.MSELoss, | |||
"NLLLoss".lower(): torch.nn.NLLLoss, | |||
"KLDivLoss".lower(): torch.nn.KLDivLoss, | |||
"NLLLoss2dLoss".lower(): torch.nn.NLLLoss2d, # every name should end with "loss" | |||
"SmoothL1Loss".lower(): torch.nn.SmoothL1Loss, | |||
"SoftMarginLoss".lower(): torch.nn.SoftMarginLoss, | |||
"PoissonNLLLoss".lower(): torch.nn.PoissonNLLLoss, | |||
"MultiMarginLoss".lower(): torch.nn.MultiMarginLoss, | |||
"CrossEntropyLoss".lower(): torch.nn.CrossEntropyLoss, | |||
"BCEWithLogitsLoss".lower(): torch.nn.BCEWithLogitsLoss, | |||
"MarginRankingLoss".lower(): torch.nn.MarginRankingLoss, | |||
"TripletMarginLoss".lower(): torch.nn.TripletMarginLoss, | |||
"HingeEmbeddingLoss".lower(): torch.nn.HingeEmbeddingLoss, | |||
"CosineEmbeddingLoss".lower(): torch.nn.CosineEmbeddingLoss, | |||
"MultiLabelMarginLoss".lower(): torch.nn.MultiLabelMarginLoss, | |||
"MultiLabelSoftMarginLoss".lower(): torch.nn.MultiLabelSoftMarginLoss, | |||
} | |||
class Loss(object): | |||
'''a Loss object is a callable object represents loss functions | |||
''' | |||
def __init__(self, loss_name, pre_pro=[squash], **kwargs): | |||
''' | |||
:param loss_name: str or None , the name of loss function | |||
:param pre_pro : list of function or str, methods to reform parameters before calculating loss | |||
the strings will be auto translated to pre-defined functions | |||
:param **kwargs: kwargs for torch loss function | |||
pre_pro funcsions should have three arguments: predict, truth, **arg | |||
predict and truth is the necessary parameters in loss function | |||
kwargs is the extra parameters passed-in when calling loss function | |||
pre_pro functions should return two objects, respectively predict and truth that after processed | |||
''' | |||
if loss_name is None: | |||
# this is useful when Trainer.__init__ performs type check | |||
self._loss = None | |||
else: | |||
if not isinstance(loss_name, str): | |||
raise NotImplementedError | |||
else: | |||
self._loss = self._get_loss(loss_name, **kwargs) | |||
self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro] | |||
def add_pre_pro(self, func): | |||
'''add a pre_pro function | |||
:param func: a function or str, methods to reform parameters before calculating loss | |||
the strings will be auto translated to pre-defined functions | |||
''' | |||
if not callable(func): | |||
func = method_dict.get(func) | |||
if func is None: | |||
return | |||
self.pre_pro.append(func) | |||
@staticmethod | |||
def _get_loss(loss_name, **kwargs): | |||
'''Get loss function from torch | |||
:param loss_name: str, the name of loss function | |||
:param **kwargs: kwargs for torch loss function | |||
:return: A callable loss function object | |||
''' | |||
loss_name = loss_name.strip().lower() | |||
loss_name = "".join(loss_name.split("_")) | |||
if len(loss_name) < 4 or loss_name[-4:] != "loss": | |||
loss_name += "loss" | |||
return loss_function_name[loss_name](**kwargs) | |||
def get(self): | |||
'''This method exists just for make some existing codes run error-freely | |||
''' | |||
return self | |||
def __call__(self, predict, truth, **kwargs): | |||
'''call a loss function | |||
predict and truth will be processed by pre_pro methods in order of addition | |||
:param predict : Tensor, model output | |||
:param truth : Tensor, truth from dataset | |||
:param **kwargs : extra arguments, pass to pre_pro functions | |||
for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens | |||
''' | |||
for f in self.pre_pro: | |||
if f is None: | |||
continue | |||
predict, truth = f(predict, truth, **kwargs) | |||
return self._loss(predict, truth) |
@@ -1,27 +1,25 @@ | |||
import itertools | |||
import os | |||
import time | |||
from datetime import timedelta | |||
from datetime import datetime | |||
import warnings | |||
from collections import defaultdict | |||
import os | |||
import itertools | |||
import shutil | |||
from datetime import datetime | |||
from datetime import timedelta | |||
from tensorboardX import SummaryWriter | |||
import torch | |||
from tensorboardX import SummaryWriter | |||
from fastNLP.core.batch import Batch | |||
from fastNLP.core.loss import Loss | |||
from fastNLP.core.metrics import Evaluator | |||
from fastNLP.core.optimizer import Optimizer | |||
from fastNLP.core.sampler import RandomSampler | |||
from fastNLP.core.sampler import SequentialSampler | |||
from fastNLP.core.tester import Tester | |||
from fastNLP.core.utils import _check_arg_dict_list | |||
from fastNLP.core.utils import _build_args | |||
from fastNLP.core.utils import _check_arg_dict_list | |||
from fastNLP.core.utils import _syn_model_data | |||
from fastNLP.core.utils import get_func_signature | |||
class Trainer(object): | |||
"""Main Training Loop | |||
@@ -1,9 +1,10 @@ | |||
import math | |||
import unittest | |||
import fastNLP.core.loss as loss | |||
import math | |||
import torch as tc | |||
import pdb | |||
import fastNLP.core.losses as loss | |||
class TestLoss(unittest.TestCase): | |||